perf/core improvements and fixes:

perf c2c:
 
   Jiri Olsa:
 
   - Change the default coalesce setup to from '--coalesce pid,iaddr' to just '--coalesce iaddr'.
 
   - Increase the HITM ratio limit for displayed cachelines.
 
 perf script:
 
   Andi Kleen:
 
   - Fix LBR skid dump problems in brstackinsn.
 
 perf trace:
 
   Arnaldo Carvalho de Melo:
 
   - Check if the raw_syscalls:sys_{enter,exit} are setup before setting tp filter.
 
   - Do not hardcode the size of the tracepoint common_ fields.
 
   - Beautify USBDEFFS_ ioctl commands.
 
   Colin Ian King:
 
   - Use correct SECCOMP prefix spelling, "SECOMP_*" -> "SECCOMP_*".
 
 perf python:
 
   Jiri Olsa:
 
   - Do not force closing original perf descriptor in evlist.get_pollfd().
 
 tools misc:
 
   Jiri Olsa:
 
   - Allow overriding CFLAGS and LDFLAGS.
 
 perf build:
 
   Stanislav Fomichev:
 
   - Don't unconditionally link the libbfd feature test to -liberty and -lz
 
 thread-stack:
 
   Adrian Hunter:
 
   - Fix processing for the idle task, having a stack per cpu.
 
 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYIAB0WIQR2GiIUctdOfX2qHhGyPKLppCJ+JwUCXC4BOgAKCRCyPKLppCJ+
 Jy1BAP0Vr6fC+mv/ul0x3WC4dlF0UG9p9+GxBoKsXPpG5vojCgEAqX7F+Pmx+6HK
 FIBjbOWIL5NYRViskwPlQy5+qkKmJgA=
 =I4gE
 -----END PGP SIGNATURE-----

Merge tag 'perf-core-for-mingo-4.21-20190103' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

perf c2c:

  Jiri Olsa:

  - Change the default coalesce setup to from '--coalesce pid,iaddr' to just '--coalesce iaddr'.

  - Increase the HITM ratio limit for displayed cachelines.

perf script:

  Andi Kleen:

  - Fix LBR skid dump problems in brstackinsn.

perf trace:

  Arnaldo Carvalho de Melo:

  - Check if the raw_syscalls:sys_{enter,exit} are setup before setting tp filter.

  - Do not hardcode the size of the tracepoint common_ fields.

  - Beautify USBDEFFS_ ioctl commands.

  Colin Ian King:

  - Use correct SECCOMP prefix spelling, "SECOMP_*" -> "SECCOMP_*".

perf python:

  Jiri Olsa:

  - Do not force closing original perf descriptor in evlist.get_pollfd().

tools misc:

  Jiri Olsa:

  - Allow overriding CFLAGS and LDFLAGS.

perf build:

  Stanislav Fomichev:

  - Don't unconditionally link the libbfd feature test to -liberty and -lz

thread-stack:

  Adrian Hunter:

  - Fix processing for the idle task, having a stack per cpu.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2019-01-03 14:05:16 +01:00
commit 2573be22e5
27 changed files with 623 additions and 169 deletions

View File

@ -82,8 +82,8 @@ FEATURE_TESTS_EXTRA := \
cplus-demangle \
hello \
libbabeltrace \
liberty \
liberty-z \
libbfd-liberty \
libbfd-liberty-z \
libunwind-debug-frame \
libunwind-debug-frame-arm \
libunwind-debug-frame-aarch64 \

View File

@ -17,8 +17,8 @@ FILES= \
test-libbfd.bin \
test-disassembler-four-args.bin \
test-reallocarray.bin \
test-liberty.bin \
test-liberty-z.bin \
test-libbfd-liberty.bin \
test-libbfd-liberty-z.bin \
test-cplus-demangle.bin \
test-libelf.bin \
test-libelf-getphdrnum.bin \
@ -210,7 +210,7 @@ $(OUTPUT)test-libpython-version.bin:
$(BUILD)
$(OUTPUT)test-libbfd.bin:
$(BUILD) -DPACKAGE='"perf"' -lbfd -lz -liberty -ldl
$(BUILD) -DPACKAGE='"perf"' -lbfd -ldl
$(OUTPUT)test-disassembler-four-args.bin:
$(BUILD) -DPACKAGE='"perf"' -lbfd -lopcodes
@ -218,10 +218,10 @@ $(OUTPUT)test-disassembler-four-args.bin:
$(OUTPUT)test-reallocarray.bin:
$(BUILD)
$(OUTPUT)test-liberty.bin:
$(OUTPUT)test-libbfd-liberty.bin:
$(CC) $(CFLAGS) -Wall -Werror -o $@ test-libbfd.c -DPACKAGE='"perf"' $(LDFLAGS) -lbfd -ldl -liberty
$(OUTPUT)test-liberty-z.bin:
$(OUTPUT)test-libbfd-liberty-z.bin:
$(CC) $(CFLAGS) -Wall -Werror -o $@ test-libbfd.c -DPACKAGE='"perf"' $(LDFLAGS) -lbfd -ldl -liberty -lz
$(OUTPUT)test-cplus-demangle.bin:

View File

@ -12,7 +12,7 @@ endif
# (this improves performance and avoids hard-to-debug behaviour);
MAKEFLAGS += -r
CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
ALL_TARGETS := lsgpio gpio-hammer gpio-event-mon
ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))

View File

@ -0,0 +1,201 @@
/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
/*****************************************************************************/
/*
* usbdevice_fs.h -- USB device file system.
*
* Copyright (C) 2000
* Thomas Sailer (sailer@ife.ee.ethz.ch)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* History:
* 0.1 04.01.2000 Created
*/
/*****************************************************************************/
#ifndef _UAPI_LINUX_USBDEVICE_FS_H
#define _UAPI_LINUX_USBDEVICE_FS_H
#include <linux/types.h>
#include <linux/magic.h>
/* --------------------------------------------------------------------- */
/* usbdevfs ioctl codes */
struct usbdevfs_ctrltransfer {
__u8 bRequestType;
__u8 bRequest;
__u16 wValue;
__u16 wIndex;
__u16 wLength;
__u32 timeout; /* in milliseconds */
void __user *data;
};
struct usbdevfs_bulktransfer {
unsigned int ep;
unsigned int len;
unsigned int timeout; /* in milliseconds */
void __user *data;
};
struct usbdevfs_setinterface {
unsigned int interface;
unsigned int altsetting;
};
struct usbdevfs_disconnectsignal {
unsigned int signr;
void __user *context;
};
#define USBDEVFS_MAXDRIVERNAME 255
struct usbdevfs_getdriver {
unsigned int interface;
char driver[USBDEVFS_MAXDRIVERNAME + 1];
};
struct usbdevfs_connectinfo {
unsigned int devnum;
unsigned char slow;
};
#define USBDEVFS_URB_SHORT_NOT_OK 0x01
#define USBDEVFS_URB_ISO_ASAP 0x02
#define USBDEVFS_URB_BULK_CONTINUATION 0x04
#define USBDEVFS_URB_NO_FSBR 0x20 /* Not used */
#define USBDEVFS_URB_ZERO_PACKET 0x40
#define USBDEVFS_URB_NO_INTERRUPT 0x80
#define USBDEVFS_URB_TYPE_ISO 0
#define USBDEVFS_URB_TYPE_INTERRUPT 1
#define USBDEVFS_URB_TYPE_CONTROL 2
#define USBDEVFS_URB_TYPE_BULK 3
struct usbdevfs_iso_packet_desc {
unsigned int length;
unsigned int actual_length;
unsigned int status;
};
struct usbdevfs_urb {
unsigned char type;
unsigned char endpoint;
int status;
unsigned int flags;
void __user *buffer;
int buffer_length;
int actual_length;
int start_frame;
union {
int number_of_packets; /* Only used for isoc urbs */
unsigned int stream_id; /* Only used with bulk streams */
};
int error_count;
unsigned int signr; /* signal to be sent on completion,
or 0 if none should be sent. */
void __user *usercontext;
struct usbdevfs_iso_packet_desc iso_frame_desc[0];
};
/* ioctls for talking directly to drivers */
struct usbdevfs_ioctl {
int ifno; /* interface 0..N ; negative numbers reserved */
int ioctl_code; /* MUST encode size + direction of data so the
* macros in <asm/ioctl.h> give correct values */
void __user *data; /* param buffer (in, or out) */
};
/* You can do most things with hubs just through control messages,
* except find out what device connects to what port. */
struct usbdevfs_hub_portinfo {
char nports; /* number of downstream ports in this hub */
char port [127]; /* e.g. port 3 connects to device 27 */
};
/* System and bus capability flags */
#define USBDEVFS_CAP_ZERO_PACKET 0x01
#define USBDEVFS_CAP_BULK_CONTINUATION 0x02
#define USBDEVFS_CAP_NO_PACKET_SIZE_LIM 0x04
#define USBDEVFS_CAP_BULK_SCATTER_GATHER 0x08
#define USBDEVFS_CAP_REAP_AFTER_DISCONNECT 0x10
#define USBDEVFS_CAP_MMAP 0x20
#define USBDEVFS_CAP_DROP_PRIVILEGES 0x40
/* USBDEVFS_DISCONNECT_CLAIM flags & struct */
/* disconnect-and-claim if the driver matches the driver field */
#define USBDEVFS_DISCONNECT_CLAIM_IF_DRIVER 0x01
/* disconnect-and-claim except when the driver matches the driver field */
#define USBDEVFS_DISCONNECT_CLAIM_EXCEPT_DRIVER 0x02
struct usbdevfs_disconnect_claim {
unsigned int interface;
unsigned int flags;
char driver[USBDEVFS_MAXDRIVERNAME + 1];
};
struct usbdevfs_streams {
unsigned int num_streams; /* Not used by USBDEVFS_FREE_STREAMS */
unsigned int num_eps;
unsigned char eps[0];
};
/*
* USB_SPEED_* values returned by USBDEVFS_GET_SPEED are defined in
* linux/usb/ch9.h
*/
#define USBDEVFS_CONTROL _IOWR('U', 0, struct usbdevfs_ctrltransfer)
#define USBDEVFS_CONTROL32 _IOWR('U', 0, struct usbdevfs_ctrltransfer32)
#define USBDEVFS_BULK _IOWR('U', 2, struct usbdevfs_bulktransfer)
#define USBDEVFS_BULK32 _IOWR('U', 2, struct usbdevfs_bulktransfer32)
#define USBDEVFS_RESETEP _IOR('U', 3, unsigned int)
#define USBDEVFS_SETINTERFACE _IOR('U', 4, struct usbdevfs_setinterface)
#define USBDEVFS_SETCONFIGURATION _IOR('U', 5, unsigned int)
#define USBDEVFS_GETDRIVER _IOW('U', 8, struct usbdevfs_getdriver)
#define USBDEVFS_SUBMITURB _IOR('U', 10, struct usbdevfs_urb)
#define USBDEVFS_SUBMITURB32 _IOR('U', 10, struct usbdevfs_urb32)
#define USBDEVFS_DISCARDURB _IO('U', 11)
#define USBDEVFS_REAPURB _IOW('U', 12, void *)
#define USBDEVFS_REAPURB32 _IOW('U', 12, __u32)
#define USBDEVFS_REAPURBNDELAY _IOW('U', 13, void *)
#define USBDEVFS_REAPURBNDELAY32 _IOW('U', 13, __u32)
#define USBDEVFS_DISCSIGNAL _IOR('U', 14, struct usbdevfs_disconnectsignal)
#define USBDEVFS_DISCSIGNAL32 _IOR('U', 14, struct usbdevfs_disconnectsignal32)
#define USBDEVFS_CLAIMINTERFACE _IOR('U', 15, unsigned int)
#define USBDEVFS_RELEASEINTERFACE _IOR('U', 16, unsigned int)
#define USBDEVFS_CONNECTINFO _IOW('U', 17, struct usbdevfs_connectinfo)
#define USBDEVFS_IOCTL _IOWR('U', 18, struct usbdevfs_ioctl)
#define USBDEVFS_IOCTL32 _IOWR('U', 18, struct usbdevfs_ioctl32)
#define USBDEVFS_HUB_PORTINFO _IOR('U', 19, struct usbdevfs_hub_portinfo)
#define USBDEVFS_RESET _IO('U', 20)
#define USBDEVFS_CLEAR_HALT _IOR('U', 21, unsigned int)
#define USBDEVFS_DISCONNECT _IO('U', 22)
#define USBDEVFS_CONNECT _IO('U', 23)
#define USBDEVFS_CLAIM_PORT _IOR('U', 24, unsigned int)
#define USBDEVFS_RELEASE_PORT _IOR('U', 25, unsigned int)
#define USBDEVFS_GET_CAPABILITIES _IOR('U', 26, __u32)
#define USBDEVFS_DISCONNECT_CLAIM _IOR('U', 27, struct usbdevfs_disconnect_claim)
#define USBDEVFS_ALLOC_STREAMS _IOR('U', 28, struct usbdevfs_streams)
#define USBDEVFS_FREE_STREAMS _IOR('U', 29, struct usbdevfs_streams)
#define USBDEVFS_DROP_PRIVILEGES _IOW('U', 30, __u32)
#define USBDEVFS_GET_SPEED _IO('U', 31)
#endif /* _UAPI_LINUX_USBDEVICE_FS_H */

View File

@ -702,18 +702,20 @@ endif
ifeq ($(feature-libbfd), 1)
EXTLIBS += -lbfd
else
# we are on a system that requires -liberty and (maybe) -lz
# to link against -lbfd; test each case individually here
# call all detections now so we get correct
# status in VF output
$(call feature_check,liberty)
$(call feature_check,liberty-z)
$(call feature_check,cplus-demangle)
$(call feature_check,libbfd-liberty)
$(call feature_check,libbfd-liberty-z)
ifeq ($(feature-liberty), 1)
EXTLIBS += -liberty
ifeq ($(feature-libbfd-liberty), 1)
EXTLIBS += -lbfd -liberty
else
ifeq ($(feature-liberty-z), 1)
EXTLIBS += -liberty -lz
ifeq ($(feature-libbfd-liberty-z), 1)
EXTLIBS += -lbfd -liberty -lz
endif
endif
endif
@ -723,24 +725,24 @@ ifdef NO_DEMANGLE
else
ifdef HAVE_CPLUS_DEMANGLE_SUPPORT
EXTLIBS += -liberty
CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT
else
ifneq ($(feature-libbfd), 1)
ifneq ($(feature-liberty), 1)
ifneq ($(feature-liberty-z), 1)
# we dont have neither HAVE_CPLUS_DEMANGLE_SUPPORT
# or any of 'bfd iberty z' trinity
ifeq ($(feature-cplus-demangle), 1)
EXTLIBS += -liberty
CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT
else
msg := $(warning No bfd.h/libbfd found, please install binutils-dev[el]/zlib-static/libiberty-dev to gain symbol demangling)
CFLAGS += -DNO_DEMANGLE
endif
endif
ifeq ($(filter -liberty,$(EXTLIBS)),)
$(call feature_check,cplus-demangle)
# we dont have neither HAVE_CPLUS_DEMANGLE_SUPPORT
# or any of 'bfd iberty z' trinity
ifeq ($(feature-cplus-demangle), 1)
EXTLIBS += -liberty
else
msg := $(warning No bfd.h/libbfd found, please install binutils-dev[el]/zlib-static/libiberty-dev to gain symbol demangling)
CFLAGS += -DNO_DEMANGLE
endif
endif
endif
ifneq ($(filter -liberty,$(EXTLIBS)),)
CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT
endif
endif
ifneq ($(filter -lbfd,$(EXTLIBS)),)

View File

@ -497,6 +497,12 @@ prctl_option_tbl := $(srctree)/tools/perf/trace/beauty/prctl_option.sh
$(prctl_option_array): $(prctl_hdr_dir)/prctl.h $(prctl_option_tbl)
$(Q)$(SHELL) '$(prctl_option_tbl)' $(prctl_hdr_dir) > $@
usbdevfs_ioctl_array := $(beauty_ioctl_outdir)/usbdevfs_ioctl_array.c
usbdevfs_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/usbdevfs_ioctl.sh
$(usbdevfs_ioctl_array): $(linux_uapi_dir)/usbdevice_fs.h $(usbdevfs_ioctl_tbl)
$(Q)$(SHELL) '$(usbdevfs_ioctl_tbl)' $(linux_uapi_dir) > $@
x86_arch_prctl_code_array := $(beauty_outdir)/x86_arch_prctl_code_array.c
x86_arch_prctl_code_tbl := $(srctree)/tools/perf/trace/beauty/x86_arch_prctl.sh
@ -624,6 +630,7 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
$(mount_flags_array) \
$(perf_ioctl_array) \
$(prctl_option_array) \
$(usbdevfs_ioctl_array) \
$(x86_arch_prctl_code_array) \
$(rename_flags_array) \
$(arch_errno_name_array)
@ -923,6 +930,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
$(OUTPUT)$(vhost_virtio_ioctl_array) \
$(OUTPUT)$(perf_ioctl_array) \
$(OUTPUT)$(prctl_option_array) \
$(OUTPUT)$(usbdevfs_ioctl_array) \
$(OUTPUT)$(x86_arch_prctl_code_array) \
$(OUTPUT)$(rename_flags_array) \
$(OUTPUT)$(arch_errno_name_array)

View File

@ -68,7 +68,7 @@ struct c2c_hist_entry {
struct hist_entry he;
};
static char const *coalesce_default = "pid,iaddr";
static char const *coalesce_default = "iaddr";
struct perf_c2c {
struct perf_tool tool;
@ -1878,7 +1878,7 @@ static int c2c_hists__reinit(struct c2c_hists *c2c_hists,
return hpp_list__parse(&c2c_hists->list, output, sort);
}
#define DISPLAY_LINE_LIMIT 0.0005
#define DISPLAY_LINE_LIMIT 0.001
static bool he__display(struct hist_entry *he, struct c2c_stats *stats)
{

View File

@ -1073,9 +1073,18 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
/*
* Print final block upto sample
*
* Due to pipeline delays the LBRs might be missing a branch
* or two, which can result in very large or negative blocks
* between final branch and sample. When this happens just
* continue walking after the last TO until we hit a branch.
*/
start = br->entries[0].to;
end = sample->ip;
if (end < start) {
/* Missing jump. Scan 128 bytes for the next branch */
end = start + 128;
}
len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, true);
printed += ip__fprintf_sym(start, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
if (len <= 0) {
@ -1084,7 +1093,6 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
machine, thread, &x.is64bit, &x.cpumode, false);
if (len <= 0)
goto out;
printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", sample->ip,
dump_insn(&x, sample->ip, buffer, len, NULL));
if (PRINT_FIELD(SRCCODE))
@ -1096,6 +1104,13 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
dump_insn(&x, start + off, buffer + off, len - off, &ilen));
if (ilen == 0)
break;
if (arch_is_branch(buffer + off, len - off, x.is64bit) && start + off != sample->ip) {
/*
* Hit a missing branch. Just stop.
*/
printed += fprintf(fp, "\t... not reaching sample ...\n");
break;
}
if (PRINT_FIELD(SRCCODE))
print_srccode(thread, x.cpumode, start + off);
}
@ -1167,7 +1182,7 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
struct addr_location *al, FILE *fp)
{
struct perf_event_attr *attr = &evsel->attr;
size_t depth = thread_stack__depth(thread);
size_t depth = thread_stack__depth(thread, sample->cpu);
const char *name = NULL;
static int spacing;
int len = 0;
@ -1701,7 +1716,7 @@ static bool show_event(struct perf_sample *sample,
struct thread *thread,
struct addr_location *al)
{
int depth = thread_stack__depth(thread);
int depth = thread_stack__depth(thread, sample->cpu);
if (!symbol_conf.graph_function)
return true;

View File

@ -60,6 +60,7 @@
#include <linux/stringify.h>
#include <linux/time64.h>
#include <fcntl.h>
#include <sys/sysmacros.h>
#include "sane_ctype.h"
@ -112,8 +113,9 @@ struct trace {
} stats;
unsigned int max_stack;
unsigned int min_stack;
bool sort_events;
int raw_augmented_syscalls_args_size;
bool raw_augmented_syscalls;
bool sort_events;
bool not_ev_qualifier;
bool live;
bool full_time;
@ -283,12 +285,17 @@ out_delete:
return -ENOENT;
}
static int perf_evsel__init_augmented_syscall_tp(struct perf_evsel *evsel)
static int perf_evsel__init_augmented_syscall_tp(struct perf_evsel *evsel, struct perf_evsel *tp)
{
struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
if (evsel->priv != NULL) { /* field, sizeof_field, offsetof_field */
if (__tp_field__init_uint(&sc->id, sizeof(long), sizeof(long long), evsel->needs_swap))
if (evsel->priv != NULL) {
struct tep_format_field *syscall_id = perf_evsel__field(tp, "id");
if (syscall_id == NULL)
syscall_id = perf_evsel__field(tp, "__syscall_nr");
if (syscall_id == NULL)
goto out_delete;
if (__tp_field__init_uint(&sc->id, syscall_id->size, syscall_id->offset, evsel->needs_swap))
goto out_delete;
return 0;
@ -974,9 +981,9 @@ struct thread_trace {
char *name;
} filename;
struct {
int max;
char **table;
} paths;
int max;
struct file *table;
} files;
struct intlist *syscall_stats;
};
@ -986,7 +993,7 @@ static struct thread_trace *thread_trace__new(void)
struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
if (ttrace)
ttrace->paths.max = -1;
ttrace->files.max = -1;
ttrace->syscall_stats = intlist__new(NULL);
@ -1030,30 +1037,48 @@ void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
static const size_t trace__entry_str_size = 2048;
static struct file *thread_trace__files_entry(struct thread_trace *ttrace, int fd)
{
if (fd > ttrace->files.max) {
struct file *nfiles = realloc(ttrace->files.table, (fd + 1) * sizeof(struct file));
if (nfiles == NULL)
return NULL;
if (ttrace->files.max != -1) {
memset(nfiles + ttrace->files.max + 1, 0,
(fd - ttrace->files.max) * sizeof(struct file));
} else {
memset(nfiles, 0, (fd + 1) * sizeof(struct file));
}
ttrace->files.table = nfiles;
ttrace->files.max = fd;
}
return ttrace->files.table + fd;
}
struct file *thread__files_entry(struct thread *thread, int fd)
{
return thread_trace__files_entry(thread__priv(thread), fd);
}
static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
{
struct thread_trace *ttrace = thread__priv(thread);
struct file *file = thread_trace__files_entry(ttrace, fd);
if (fd > ttrace->paths.max) {
char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
if (npath == NULL)
return -1;
if (ttrace->paths.max != -1) {
memset(npath + ttrace->paths.max + 1, 0,
(fd - ttrace->paths.max) * sizeof(char *));
} else {
memset(npath, 0, (fd + 1) * sizeof(char *));
}
ttrace->paths.table = npath;
ttrace->paths.max = fd;
if (file != NULL) {
struct stat st;
if (stat(pathname, &st) == 0)
file->dev_maj = major(st.st_rdev);
file->pathname = strdup(pathname);
if (file->pathname)
return 0;
}
ttrace->paths.table[fd] = strdup(pathname);
return ttrace->paths.table[fd] != NULL ? 0 : -1;
return -1;
}
static int thread__read_fd_path(struct thread *thread, int fd)
@ -1093,7 +1118,7 @@ static const char *thread__fd_path(struct thread *thread, int fd,
if (fd < 0)
return NULL;
if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
if ((fd > ttrace->files.max || ttrace->files.table[fd].pathname == NULL)) {
if (!trace->live)
return NULL;
++trace->stats.proc_getname;
@ -1101,7 +1126,7 @@ static const char *thread__fd_path(struct thread *thread, int fd,
return NULL;
}
return ttrace->paths.table[fd];
return ttrace->files.table[fd].pathname;
}
size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
@ -1140,8 +1165,8 @@ static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
struct thread_trace *ttrace = thread__priv(arg->thread);
if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
zfree(&ttrace->paths.table[fd]);
if (ttrace && fd >= 0 && fd <= ttrace->files.max)
zfree(&ttrace->files.table[fd].pathname);
return printed;
}
@ -1768,16 +1793,16 @@ static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel,
return printed;
}
static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, bool raw_augmented)
static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, int raw_augmented_args_size)
{
void *augmented_args = NULL;
/*
* For now with BPF raw_augmented we hook into raw_syscalls:sys_enter
* and there we get all 6 syscall args plus the tracepoint common
* fields (sizeof(long)) and the syscall_nr (another long). So we check
* if that is the case and if so don't look after the sc->args_size,
* but always after the full raw_syscalls:sys_enter payload, which is
* fixed.
* and there we get all 6 syscall args plus the tracepoint common fields
* that gets calculated at the start and the syscall_nr (another long).
* So we check if that is the case and if so don't look after the
* sc->args_size but always after the full raw_syscalls:sys_enter payload,
* which is fixed.
*
* We'll revisit this later to pass s->args_size to the BPF augmenter
* (now tools/perf/examples/bpf/augmented_raw_syscalls.c, so that it
@ -1785,7 +1810,7 @@ static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sam
* use syscalls:sys_enter_NAME, so that we reduce the kernel/userspace
* traffic to just what is needed for each syscall.
*/
int args_size = raw_augmented ? (8 * (int)sizeof(long)) : sc->args_size;
int args_size = raw_augmented_args_size ?: sc->args_size;
*augmented_args_size = sample->raw_size - args_size;
if (*augmented_args_size > 0)
@ -1839,7 +1864,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
* here and avoid using augmented syscalls when the evsel is the raw_syscalls one.
*/
if (evsel != trace->syscalls.events.sys_enter)
augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls);
augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size);
ttrace->entry_time = sample->time;
msg = ttrace->entry_str;
printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
@ -1897,7 +1922,7 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evse
goto out_put;
args = perf_evsel__sc_tp_ptr(evsel, args, sample);
augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls);
augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size);
syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread);
fprintf(trace->output, "%s", msg);
err = 0;
@ -2686,7 +2711,9 @@ static int trace__set_ev_qualifier_filter(struct trace *trace)
{
if (trace->syscalls.map)
return trace__set_ev_qualifier_bpf_filter(trace);
return trace__set_ev_qualifier_tp_filter(trace);
if (trace->syscalls.events.sys_enter)
return trace__set_ev_qualifier_tp_filter(trace);
return 0;
}
static int bpf_map__set_filter_pids(struct bpf_map *map __maybe_unused,
@ -3812,13 +3839,6 @@ int cmd_trace(int argc, const char **argv)
* syscall.
*/
if (trace.syscalls.events.augmented) {
evsel = trace.syscalls.events.augmented;
if (perf_evsel__init_augmented_syscall_tp(evsel) ||
perf_evsel__init_augmented_syscall_tp_args(evsel))
goto out;
evsel->handler = trace__sys_enter;
evlist__for_each_entry(trace.evlist, evsel) {
bool raw_syscalls_sys_exit = strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
@ -3827,9 +3847,41 @@ int cmd_trace(int argc, const char **argv)
goto init_augmented_syscall_tp;
}
if (strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_enter") == 0) {
struct perf_evsel *augmented = trace.syscalls.events.augmented;
if (perf_evsel__init_augmented_syscall_tp(augmented, evsel) ||
perf_evsel__init_augmented_syscall_tp_args(augmented))
goto out;
augmented->handler = trace__sys_enter;
}
if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) {
struct syscall_tp *sc;
init_augmented_syscall_tp:
perf_evsel__init_augmented_syscall_tp(evsel);
if (perf_evsel__init_augmented_syscall_tp(evsel, evsel))
goto out;
sc = evsel->priv;
/*
* For now with BPF raw_augmented we hook into
* raw_syscalls:sys_enter and there we get all
* 6 syscall args plus the tracepoint common
* fields and the syscall_nr (another long).
* So we check if that is the case and if so
* don't look after the sc->args_size but
* always after the full raw_syscalls:sys_enter
* payload, which is fixed.
*
* We'll revisit this later to pass
* s->args_size to the BPF augmenter (now
* tools/perf/examples/bpf/augmented_raw_syscalls.c,
* so that it copies only what we need for each
* syscall, like what happens when we use
* syscalls:sys_enter_NAME, so that we reduce
* the kernel/userspace traffic to just what is
* needed for each syscall.
*/
if (trace.raw_augmented_syscalls)
trace.raw_augmented_syscalls_args_size = (6 + 1) * sizeof(long) + sc->id.offset;
perf_evsel__init_augmented_syscall_tp_ret(evsel);
evsel->handler = trace__sys_exit;
}

View File

@ -14,6 +14,7 @@ include/uapi/linux/perf_event.h
include/uapi/linux/prctl.h
include/uapi/linux/sched.h
include/uapi/linux/stat.h
include/uapi/linux/usbdevice_fs.h
include/uapi/linux/vhost.h
include/uapi/sound/asound.h
include/linux/bits.h

View File

@ -32,6 +32,13 @@ size_t strarray__scnprintf_flags(struct strarray *sa, char *bf, size_t size, boo
struct trace;
struct thread;
struct file {
char *pathname;
int dev_maj;
};
struct file *thread__files_entry(struct thread *thread, int fd);
struct strarrays {
int nr_entries;
struct strarray **entries;

View File

@ -112,6 +112,17 @@ static size_t ioctl__scnprintf_perf_cmd(int nr, int dir, char *bf, size_t size)
return scnprintf(bf, size, "(%#x, %#x, %#x)", 0xAE, nr, dir);
}
static size_t ioctl__scnprintf_usbdevfs_cmd(int nr, int dir, char *bf, size_t size)
{
#include "trace/beauty/generated/ioctl/usbdevfs_ioctl_array.c"
static DEFINE_STRARRAY(usbdevfs_ioctl_cmds, "");
if (nr < strarray__usbdevfs_ioctl_cmds.nr_entries && strarray__usbdevfs_ioctl_cmds.entries[nr] != NULL)
return scnprintf(bf, size, "USBDEVFS_%s", strarray__usbdevfs_ioctl_cmds.entries[nr]);
return scnprintf(bf, size, "(%c, %#x, %#x)", 'U', nr, dir);
}
static size_t ioctl__scnprintf_cmd(unsigned long cmd, char *bf, size_t size, bool show_prefix)
{
const char *prefix = "_IOC_";
@ -157,9 +168,20 @@ static size_t ioctl__scnprintf_cmd(unsigned long cmd, char *bf, size_t size, boo
return printed + scnprintf(bf + printed, size - printed, ", %#x, %#x, %#x)", type, nr, sz);
}
#ifndef USB_DEVICE_MAJOR
#define USB_DEVICE_MAJOR 189
#endif // USB_DEVICE_MAJOR
size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg)
{
unsigned long cmd = arg->val;
unsigned int fd = syscall_arg__val(arg, 0);
struct file *file = thread__files_entry(arg->thread, fd);
if (file != NULL) {
if (file->dev_maj == USB_DEVICE_MAJOR)
return ioctl__scnprintf_usbdevfs_cmd(_IOC_NR(cmd), _IOC_DIR(cmd), bf, size);
}
return ioctl__scnprintf_cmd(cmd, bf, size, arg->show_string_prefix);
}

View File

@ -18,8 +18,8 @@ static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
}
P_MMAP_PROT(READ);
P_MMAP_PROT(EXEC);
P_MMAP_PROT(WRITE);
P_MMAP_PROT(EXEC);
P_MMAP_PROT(SEM);
P_MMAP_PROT(GROWSDOWN);
P_MMAP_PROT(GROWSUP);

View File

@ -9,7 +9,7 @@
static size_t syscall_arg__scnprintf_seccomp_op(char *bf, size_t size, struct syscall_arg *arg)
{
bool show_prefix = arg->show_string_prefix;
const char *prefix = "SECOMP_SET_MODE_";
const char *prefix = "SECCOMP_SET_MODE_";
int op = arg->val;
size_t printed = 0;
@ -34,7 +34,7 @@ static size_t syscall_arg__scnprintf_seccomp_flags(char *bf, size_t size,
struct syscall_arg *arg)
{
bool show_prefix = arg->show_string_prefix;
const char *prefix = "SECOMP_FILTER_FLAG_";
const char *prefix = "SECCOMP_FILTER_FLAG_";
int printed = 0, flags = arg->val;
#define P_FLAG(n) \

View File

@ -0,0 +1,19 @@
#!/bin/sh
# SPDX-License-Identifier: LGPL-2.1
[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
printf "static const char *usbdevfs_ioctl_cmds[] = {\n"
regex="^#[[:space:]]*define[[:space:]]+USBDEVFS_(\w+)[[:space:]]+_IO[WR]{0,2}\([[:space:]]*'U'[[:space:]]*,[[:space:]]*([[:digit:]]+).*"
egrep $regex ${header_dir}/usbdevice_fs.h | egrep -v 'USBDEVFS_\w+32[[:space:]]' | \
sed -r "s/$regex/\2 \1/g" | \
sort | xargs printf "\t[%s] = \"%s\",\n"
printf "};\n\n"
printf "#if 0\n"
printf "static const char *usbdevfs_ioctl_32_cmds[] = {\n"
regex="^#[[:space:]]*define[[:space:]]+USBDEVFS_(\w+)[[:space:]]+_IO[WR]{0,2}\([[:space:]]*'U'[[:space:]]*,[[:space:]]*([[:digit:]]+).*"
egrep $regex ${header_dir}/usbdevice_fs.h | egrep 'USBDEVFS_\w+32[[:space:]]' | \
sed -r "s/$regex/\2 \1/g" | \
sort | xargs printf "\t[%s] = \"%s\",\n"
printf "};\n"
printf "#endif\n"

View File

@ -13,3 +13,11 @@ const char *dump_insn(struct perf_insn *x __maybe_unused,
*lenp = 0;
return "?";
}
__weak
int arch_is_branch(const unsigned char *buf __maybe_unused,
size_t len __maybe_unused,
int x86_64 __maybe_unused)
{
return 0;
}

View File

@ -20,4 +20,6 @@ struct perf_insn {
const char *dump_insn(struct perf_insn *x, u64 ip,
u8 *inbuf, int inlen, int *lenp);
int arch_is_branch(const unsigned char *buf, size_t len, int x86_64);
#endif

View File

@ -451,7 +451,7 @@ static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
continue;
intel_bts_get_branch_type(btsq, branch);
if (btsq->bts->synth_opts.thread_stack)
thread_stack__event(thread, btsq->sample_flags,
thread_stack__event(thread, btsq->cpu, btsq->sample_flags,
le64_to_cpu(branch->from),
le64_to_cpu(branch->to),
btsq->intel_pt_insn.length,
@ -523,7 +523,7 @@ static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp)
!btsq->bts->synth_opts.thread_stack && thread &&
(!old_buffer || btsq->bts->sampling_mode ||
(btsq->bts->snapshot_mode && !buffer->consecutive)))
thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1);
thread_stack__set_trace_nr(thread, btsq->cpu, buffer->buffer_nr + 1);
err = intel_bts_process_buffer(btsq, buffer, thread);

View File

@ -180,6 +180,14 @@ int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
return 0;
}
int arch_is_branch(const unsigned char *buf, size_t len, int x86_64)
{
struct intel_pt_insn in;
if (intel_pt_get_insn(buf, len, x86_64, &in) < 0)
return -1;
return in.branch != INTEL_PT_BR_NO_BRANCH;
}
const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused,
u8 *inbuf, int inlen, int *lenp)
{

View File

@ -1174,7 +1174,7 @@ static void intel_pt_prep_sample(struct intel_pt *pt,
intel_pt_prep_b_sample(pt, ptq, event, sample);
if (pt->synth_opts.callchain) {
thread_stack__sample(ptq->thread, ptq->chain,
thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain,
pt->synth_opts.callchain_sz + 1,
sample->ip, pt->kernel_start);
sample->callchain = ptq->chain;
@ -1526,11 +1526,11 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
return 0;
if (pt->synth_opts.callchain || pt->synth_opts.thread_stack)
thread_stack__event(ptq->thread, ptq->flags, state->from_ip,
thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, state->from_ip,
state->to_ip, ptq->insn_len,
state->trace_nr);
else
thread_stack__set_trace_nr(ptq->thread, state->trace_nr);
thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr);
if (pt->sample_branches) {
err = intel_pt_synth_branch_sample(ptq);

View File

@ -939,7 +939,8 @@ static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist,
file = PyFile_FromFile(fp, "perf", "r", NULL);
#else
file = PyFile_FromFd(evlist->pollfd.entries[i].fd, "perf", "r", -1, NULL, NULL, NULL, 1);
file = PyFile_FromFd(evlist->pollfd.entries[i].fd, "perf", "r", -1,
NULL, NULL, NULL, 0);
#endif
if (file == NULL)
goto free_list;

View File

@ -1527,6 +1527,13 @@ struct thread *perf_session__findnew(struct perf_session *session, pid_t pid)
return machine__findnew_thread(&session->machines.host, -1, pid);
}
/*
* Threads are identified by pid and tid, and the idle task has pid == tid == 0.
* So here a single thread is created for that, but actually there is a separate
* idle task per cpu, so there should be one 'struct thread' per cpu, but there
* is only 1. That causes problems for some tools, requiring workarounds. For
* example get_idle_thread() in builtin-sched.c, or thread_stack__per_cpu().
*/
int perf_session__register_idle_thread(struct perf_session *session)
{
struct thread *thread;

View File

@ -15,6 +15,7 @@
#include <linux/rbtree.h>
#include <linux/list.h>
#include <linux/log2.h>
#include <errno.h>
#include "thread.h"
#include "event.h"
@ -60,6 +61,7 @@ struct thread_stack_entry {
* @last_time: last timestamp
* @crp: call/return processor
* @comm: current comm
* @arr_sz: size of array if this is the first element of an array
*/
struct thread_stack {
struct thread_stack_entry *stack;
@ -71,8 +73,19 @@ struct thread_stack {
u64 last_time;
struct call_return_processor *crp;
struct comm *comm;
unsigned int arr_sz;
};
/*
* Assume pid == tid == 0 identifies the idle task as defined by
* perf_session__register_idle_thread(). The idle task is really 1 task per cpu,
* and therefore requires a stack for each cpu.
*/
static inline bool thread_stack__per_cpu(struct thread *thread)
{
return !(thread->tid || thread->pid_);
}
static int thread_stack__grow(struct thread_stack *ts)
{
struct thread_stack_entry *new_stack;
@ -91,19 +104,14 @@ static int thread_stack__grow(struct thread_stack *ts)
return 0;
}
static struct thread_stack *thread_stack__new(struct thread *thread,
struct call_return_processor *crp)
static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
struct call_return_processor *crp)
{
struct thread_stack *ts;
int err;
ts = zalloc(sizeof(struct thread_stack));
if (!ts)
return NULL;
if (thread_stack__grow(ts)) {
free(ts);
return NULL;
}
err = thread_stack__grow(ts);
if (err)
return err;
if (thread->mg && thread->mg->machine)
ts->kernel_start = machine__kernel_start(thread->mg->machine);
@ -111,9 +119,72 @@ static struct thread_stack *thread_stack__new(struct thread *thread,
ts->kernel_start = 1ULL << 63;
ts->crp = crp;
return 0;
}
static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
struct call_return_processor *crp)
{
struct thread_stack *ts = thread->ts, *new_ts;
unsigned int old_sz = ts ? ts->arr_sz : 0;
unsigned int new_sz = 1;
if (thread_stack__per_cpu(thread) && cpu > 0)
new_sz = roundup_pow_of_two(cpu + 1);
if (!ts || new_sz > old_sz) {
new_ts = calloc(new_sz, sizeof(*ts));
if (!new_ts)
return NULL;
if (ts)
memcpy(new_ts, ts, old_sz * sizeof(*ts));
new_ts->arr_sz = new_sz;
zfree(&thread->ts);
thread->ts = new_ts;
ts = new_ts;
}
if (thread_stack__per_cpu(thread) && cpu > 0 &&
(unsigned int)cpu < ts->arr_sz)
ts += cpu;
if (!ts->stack &&
thread_stack__init(ts, thread, crp))
return NULL;
return ts;
}
static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu)
{
struct thread_stack *ts = thread->ts;
if (cpu < 0)
cpu = 0;
if (!ts || (unsigned int)cpu >= ts->arr_sz)
return NULL;
ts += cpu;
if (!ts->stack)
return NULL;
return ts;
}
static inline struct thread_stack *thread__stack(struct thread *thread,
int cpu)
{
if (!thread)
return NULL;
if (thread_stack__per_cpu(thread))
return thread__cpu_stack(thread, cpu);
return thread->ts;
}
static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
bool trace_end)
{
@ -226,25 +297,37 @@ static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
int thread_stack__flush(struct thread *thread)
{
if (thread->ts)
return __thread_stack__flush(thread, thread->ts);
struct thread_stack *ts = thread->ts;
unsigned int pos;
int err = 0;
return 0;
if (ts) {
for (pos = 0; pos < ts->arr_sz; pos++) {
int ret = __thread_stack__flush(thread, ts + pos);
if (ret)
err = ret;
}
}
return err;
}
int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
u64 to_ip, u16 insn_len, u64 trace_nr)
{
struct thread_stack *ts = thread__stack(thread, cpu);
if (!thread)
return -EINVAL;
if (!thread->ts) {
thread->ts = thread_stack__new(thread, NULL);
if (!thread->ts) {
if (!ts) {
ts = thread_stack__new(thread, cpu, NULL);
if (!ts) {
pr_warning("Out of memory: no thread stack\n");
return -ENOMEM;
}
thread->ts->trace_nr = trace_nr;
ts->trace_nr = trace_nr;
}
/*
@ -252,14 +335,14 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
* the stack might be completely invalid. Better to report nothing than
* to report something misleading, so flush the stack.
*/
if (trace_nr != thread->ts->trace_nr) {
if (thread->ts->trace_nr)
__thread_stack__flush(thread, thread->ts);
thread->ts->trace_nr = trace_nr;
if (trace_nr != ts->trace_nr) {
if (ts->trace_nr)
__thread_stack__flush(thread, ts);
ts->trace_nr = trace_nr;
}
/* Stop here if thread_stack__process() is in use */
if (thread->ts->crp)
if (ts->crp)
return 0;
if (flags & PERF_IP_FLAG_CALL) {
@ -270,7 +353,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
ret_addr = from_ip + insn_len;
if (ret_addr == to_ip)
return 0; /* Zero-length calls are excluded */
return thread_stack__push(thread->ts, ret_addr,
return thread_stack__push(ts, ret_addr,
flags & PERF_IP_FLAG_TRACE_END);
} else if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
/*
@ -280,32 +363,52 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
* address, so try to pop that. Also, do not expect a call made
* when the trace ended, to return, so pop that.
*/
thread_stack__pop(thread->ts, to_ip);
thread_stack__pop_trace_end(thread->ts);
thread_stack__pop(ts, to_ip);
thread_stack__pop_trace_end(ts);
} else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) {
thread_stack__pop(thread->ts, to_ip);
thread_stack__pop(ts, to_ip);
}
return 0;
}
void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr)
{
if (!thread || !thread->ts)
struct thread_stack *ts = thread__stack(thread, cpu);
if (!ts)
return;
if (trace_nr != thread->ts->trace_nr) {
if (thread->ts->trace_nr)
__thread_stack__flush(thread, thread->ts);
thread->ts->trace_nr = trace_nr;
if (trace_nr != ts->trace_nr) {
if (ts->trace_nr)
__thread_stack__flush(thread, ts);
ts->trace_nr = trace_nr;
}
}
static void __thread_stack__free(struct thread *thread, struct thread_stack *ts)
{
__thread_stack__flush(thread, ts);
zfree(&ts->stack);
}
static void thread_stack__reset(struct thread *thread, struct thread_stack *ts)
{
unsigned int arr_sz = ts->arr_sz;
__thread_stack__free(thread, ts);
memset(ts, 0, sizeof(*ts));
ts->arr_sz = arr_sz;
}
void thread_stack__free(struct thread *thread)
{
if (thread->ts) {
__thread_stack__flush(thread, thread->ts);
zfree(&thread->ts->stack);
struct thread_stack *ts = thread->ts;
unsigned int pos;
if (ts) {
for (pos = 0; pos < ts->arr_sz; pos++)
__thread_stack__free(thread, ts + pos);
zfree(&thread->ts);
}
}
@ -315,9 +418,11 @@ static inline u64 callchain_context(u64 ip, u64 kernel_start)
return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
}
void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
void thread_stack__sample(struct thread *thread, int cpu,
struct ip_callchain *chain,
size_t sz, u64 ip, u64 kernel_start)
{
struct thread_stack *ts = thread__stack(thread, cpu);
u64 context = callchain_context(ip, kernel_start);
u64 last_context;
size_t i, j;
@ -330,15 +435,15 @@ void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
chain->ips[0] = context;
chain->ips[1] = ip;
if (!thread || !thread->ts) {
if (!ts) {
chain->nr = 2;
return;
}
last_context = context;
for (i = 2, j = 1; i < sz && j <= thread->ts->cnt; i++, j++) {
ip = thread->ts->stack[thread->ts->cnt - j].ret_addr;
for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) {
ip = ts->stack[ts->cnt - j].ret_addr;
context = callchain_context(ip, kernel_start);
if (context != last_context) {
if (i >= sz - 1)
@ -449,7 +554,7 @@ static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
return 1;
}
static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts,
static int thread_stack__bottom(struct thread_stack *ts,
struct perf_sample *sample,
struct addr_location *from_al,
struct addr_location *to_al, u64 ref)
@ -474,7 +579,7 @@ static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts,
if (!cp)
return -ENOMEM;
return thread_stack__push_cp(thread->ts, ip, sample->time, ref, cp,
return thread_stack__push_cp(ts, ip, sample->time, ref, cp,
true, false);
}
@ -590,24 +695,19 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
struct addr_location *to_al, u64 ref,
struct call_return_processor *crp)
{
struct thread_stack *ts = thread->ts;
struct thread_stack *ts = thread__stack(thread, sample->cpu);
int err = 0;
if (ts) {
if (!ts->crp) {
/* Supersede thread_stack__event() */
thread_stack__free(thread);
thread->ts = thread_stack__new(thread, crp);
if (!thread->ts)
return -ENOMEM;
ts = thread->ts;
ts->comm = comm;
}
} else {
thread->ts = thread_stack__new(thread, crp);
if (!thread->ts)
if (ts && !ts->crp) {
/* Supersede thread_stack__event() */
thread_stack__reset(thread, ts);
ts = NULL;
}
if (!ts) {
ts = thread_stack__new(thread, sample->cpu, crp);
if (!ts)
return -ENOMEM;
ts = thread->ts;
ts->comm = comm;
}
@ -621,8 +721,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
/* If the stack is empty, put the current symbol on the stack */
if (!ts->cnt) {
err = thread_stack__bottom(thread, ts, sample, from_al, to_al,
ref);
err = thread_stack__bottom(ts, sample, from_al, to_al, ref);
if (err)
return err;
}
@ -671,9 +770,11 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
return err;
}
size_t thread_stack__depth(struct thread *thread)
size_t thread_stack__depth(struct thread *thread, int cpu)
{
if (!thread->ts)
struct thread_stack *ts = thread__stack(thread, cpu);
if (!ts)
return 0;
return thread->ts->cnt;
return ts->cnt;
}

View File

@ -80,14 +80,14 @@ struct call_return_processor {
void *data;
};
int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
u64 to_ip, u16 insn_len, u64 trace_nr);
void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr);
void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr);
void thread_stack__sample(struct thread *thread, int cpu, struct ip_callchain *chain,
size_t sz, u64 ip, u64 kernel_start);
int thread_stack__flush(struct thread *thread);
void thread_stack__free(struct thread *thread);
size_t thread_stack__depth(struct thread *thread);
size_t thread_stack__depth(struct thread *thread, int cpu);
struct call_return_processor *
call_return_processor__new(int (*process)(struct call_return *cr, void *data),

View File

@ -9,13 +9,13 @@ ifeq ("$(origin O)", "command line")
endif
turbostat : turbostat.c
CFLAGS += -Wall
CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"'
override CFLAGS += -Wall
override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
override CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"'
%: %.c
@mkdir -p $(BUILD_OUTPUT)
$(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@
$(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS)
.PHONY : clean
clean :

View File

@ -9,12 +9,12 @@ ifeq ("$(origin O)", "command line")
endif
x86_energy_perf_policy : x86_energy_perf_policy.c
CFLAGS += -Wall
CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
override CFLAGS += -Wall
override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
%: %.c
@mkdir -p $(BUILD_OUTPUT)
$(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@
$(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS)
.PHONY : clean
clean :

View File

@ -6,13 +6,13 @@ VERSION = 1.0
BINDIR=usr/bin
WARNFLAGS=-Wall -Wshadow -W -Wformat -Wimplicit-function-declaration -Wimplicit-int
CFLAGS+= -O1 ${WARNFLAGS}
override CFLAGS+= -O1 ${WARNFLAGS}
# Add "-fstack-protector" only if toolchain supports it.
CFLAGS+= $(call cc-option,-fstack-protector)
override CFLAGS+= $(call cc-option,-fstack-protector-strong)
CC?= $(CROSS_COMPILE)gcc
PKG_CONFIG?= pkg-config
CFLAGS+=-D VERSION=\"$(VERSION)\"
override CFLAGS+=-D VERSION=\"$(VERSION)\"
LDFLAGS+=
TARGET=tmon
@ -29,7 +29,7 @@ TMON_LIBS += $(shell $(PKG_CONFIG) --libs $(STATIC) panelw ncursesw 2> /dev/null
$(PKG_CONFIG) --libs $(STATIC) panel ncurses 2> /dev/null || \
echo -lpanel -lncurses)
CFLAGS += $(shell $(PKG_CONFIG) --cflags $(STATIC) panelw ncursesw 2> /dev/null || \
override CFLAGS += $(shell $(PKG_CONFIG) --cflags $(STATIC) panelw ncursesw 2> /dev/null || \
$(PKG_CONFIG) --cflags $(STATIC) panel ncurses 2> /dev/null)
OBJS = tmon.o tui.o sysfs.o pid.o