linux/tools/perf/util/namespaces.c

290 lines
5.6 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
perf tools: Add PERF_RECORD_NAMESPACES to include namespaces related info Introduce a new option to record PERF_RECORD_NAMESPACES events emitted by the kernel when fork, clone, setns or unshare are invoked. And update perf-record documentation with the new option to record namespace events. Committer notes: Combined it with a later patch to allow printing it via 'perf report -D' and be able to test the feature introduced in this patch. Had to move here also perf_ns__name(), that was introduced in another later patch. Also used PRIu64 and PRIx64 to fix the build in some enfironments wrt: util/event.c:1129:39: error: format '%lx' expects argument of type 'long unsigned int', but argument 6 has type 'long long unsigned int' [-Werror=format=] ret += fprintf(fp, "%u/%s: %lu/0x%lx%s", idx ^ Testing it: # perf record --namespaces -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 1.083 MB perf.data (423 samples) ] # # perf report -D <SNIP> 3 2028902078892 0x115140 [0xa0]: PERF_RECORD_NAMESPACES 14783/14783 - nr_namespaces: 7 [0/net: 3/0xf0000081, 1/uts: 3/0xeffffffe, 2/ipc: 3/0xefffffff, 3/pid: 3/0xeffffffc, 4/user: 3/0xeffffffd, 5/mnt: 3/0xf0000000, 6/cgroup: 3/0xeffffffb] 0x1151e0 [0x30]: event: 9 . . ... raw event: size 48 bytes . 0000: 09 00 00 00 02 00 30 00 c4 71 82 68 0c 7f 00 00 ......0..q.h.... . 0010: a9 39 00 00 a9 39 00 00 94 28 fe 63 d8 01 00 00 .9...9...(.c.... . 0020: 03 00 00 00 00 00 00 00 ce c4 02 00 00 00 00 00 ................ <SNIP> NAMESPACES events: 1 <SNIP> # Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Alexei Starovoitov <ast@fb.com> Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com> Cc: Aravinda Prasad <aravinda@linux.vnet.ibm.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: Eric Biederman <ebiederm@xmission.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Sargun Dhillon <sargun@sargun.me> Cc: Steven Rostedt <rostedt@goodmis.org> Link: http://lkml.kernel.org/r/148891930386.25309.18412039920746995488.stgit@hbathini.in.ibm.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-03-08 04:41:43 +08:00
/*
*
* Copyright (C) 2017 Hari Bathini, IBM Corporation
*/
#include "namespaces.h"
#include "event.h"
#include "get_current_dir_name.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <limits.h>
#include <sched.h>
perf tools: Add PERF_RECORD_NAMESPACES to include namespaces related info Introduce a new option to record PERF_RECORD_NAMESPACES events emitted by the kernel when fork, clone, setns or unshare are invoked. And update perf-record documentation with the new option to record namespace events. Committer notes: Combined it with a later patch to allow printing it via 'perf report -D' and be able to test the feature introduced in this patch. Had to move here also perf_ns__name(), that was introduced in another later patch. Also used PRIu64 and PRIx64 to fix the build in some enfironments wrt: util/event.c:1129:39: error: format '%lx' expects argument of type 'long unsigned int', but argument 6 has type 'long long unsigned int' [-Werror=format=] ret += fprintf(fp, "%u/%s: %lu/0x%lx%s", idx ^ Testing it: # perf record --namespaces -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 1.083 MB perf.data (423 samples) ] # # perf report -D <SNIP> 3 2028902078892 0x115140 [0xa0]: PERF_RECORD_NAMESPACES 14783/14783 - nr_namespaces: 7 [0/net: 3/0xf0000081, 1/uts: 3/0xeffffffe, 2/ipc: 3/0xefffffff, 3/pid: 3/0xeffffffc, 4/user: 3/0xeffffffd, 5/mnt: 3/0xf0000000, 6/cgroup: 3/0xeffffffb] 0x1151e0 [0x30]: event: 9 . . ... raw event: size 48 bytes . 0000: 09 00 00 00 02 00 30 00 c4 71 82 68 0c 7f 00 00 ......0..q.h.... . 0010: a9 39 00 00 a9 39 00 00 94 28 fe 63 d8 01 00 00 .9...9...(.c.... . 0020: 03 00 00 00 00 00 00 00 ce c4 02 00 00 00 00 00 ................ <SNIP> NAMESPACES events: 1 <SNIP> # Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Alexei Starovoitov <ast@fb.com> Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com> Cc: Aravinda Prasad <aravinda@linux.vnet.ibm.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: Eric Biederman <ebiederm@xmission.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Sargun Dhillon <sargun@sargun.me> Cc: Steven Rostedt <rostedt@goodmis.org> Link: http://lkml.kernel.org/r/148891930386.25309.18412039920746995488.stgit@hbathini.in.ibm.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-03-08 04:41:43 +08:00
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <asm/bug.h>
#include <linux/kernel.h>
#include <linux/zalloc.h>
perf tools: Add PERF_RECORD_NAMESPACES to include namespaces related info Introduce a new option to record PERF_RECORD_NAMESPACES events emitted by the kernel when fork, clone, setns or unshare are invoked. And update perf-record documentation with the new option to record namespace events. Committer notes: Combined it with a later patch to allow printing it via 'perf report -D' and be able to test the feature introduced in this patch. Had to move here also perf_ns__name(), that was introduced in another later patch. Also used PRIu64 and PRIx64 to fix the build in some enfironments wrt: util/event.c:1129:39: error: format '%lx' expects argument of type 'long unsigned int', but argument 6 has type 'long long unsigned int' [-Werror=format=] ret += fprintf(fp, "%u/%s: %lu/0x%lx%s", idx ^ Testing it: # perf record --namespaces -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 1.083 MB perf.data (423 samples) ] # # perf report -D <SNIP> 3 2028902078892 0x115140 [0xa0]: PERF_RECORD_NAMESPACES 14783/14783 - nr_namespaces: 7 [0/net: 3/0xf0000081, 1/uts: 3/0xeffffffe, 2/ipc: 3/0xefffffff, 3/pid: 3/0xeffffffc, 4/user: 3/0xeffffffd, 5/mnt: 3/0xf0000000, 6/cgroup: 3/0xeffffffb] 0x1151e0 [0x30]: event: 9 . . ... raw event: size 48 bytes . 0000: 09 00 00 00 02 00 30 00 c4 71 82 68 0c 7f 00 00 ......0..q.h.... . 0010: a9 39 00 00 a9 39 00 00 94 28 fe 63 d8 01 00 00 .9...9...(.c.... . 0020: 03 00 00 00 00 00 00 00 ce c4 02 00 00 00 00 00 ................ <SNIP> NAMESPACES events: 1 <SNIP> # Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Alexei Starovoitov <ast@fb.com> Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com> Cc: Aravinda Prasad <aravinda@linux.vnet.ibm.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: Eric Biederman <ebiederm@xmission.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Sargun Dhillon <sargun@sargun.me> Cc: Steven Rostedt <rostedt@goodmis.org> Link: http://lkml.kernel.org/r/148891930386.25309.18412039920746995488.stgit@hbathini.in.ibm.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-03-08 04:41:43 +08:00
static const char *perf_ns__names[] = {
[NET_NS_INDEX] = "net",
[UTS_NS_INDEX] = "uts",
[IPC_NS_INDEX] = "ipc",
[PID_NS_INDEX] = "pid",
[USER_NS_INDEX] = "user",
[MNT_NS_INDEX] = "mnt",
[CGROUP_NS_INDEX] = "cgroup",
};
const char *perf_ns__name(unsigned int id)
{
if (id >= ARRAY_SIZE(perf_ns__names))
return "UNKNOWN";
return perf_ns__names[id];
}
struct namespaces *namespaces__new(struct perf_record_namespaces *event)
perf tools: Add PERF_RECORD_NAMESPACES to include namespaces related info Introduce a new option to record PERF_RECORD_NAMESPACES events emitted by the kernel when fork, clone, setns or unshare are invoked. And update perf-record documentation with the new option to record namespace events. Committer notes: Combined it with a later patch to allow printing it via 'perf report -D' and be able to test the feature introduced in this patch. Had to move here also perf_ns__name(), that was introduced in another later patch. Also used PRIu64 and PRIx64 to fix the build in some enfironments wrt: util/event.c:1129:39: error: format '%lx' expects argument of type 'long unsigned int', but argument 6 has type 'long long unsigned int' [-Werror=format=] ret += fprintf(fp, "%u/%s: %lu/0x%lx%s", idx ^ Testing it: # perf record --namespaces -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 1.083 MB perf.data (423 samples) ] # # perf report -D <SNIP> 3 2028902078892 0x115140 [0xa0]: PERF_RECORD_NAMESPACES 14783/14783 - nr_namespaces: 7 [0/net: 3/0xf0000081, 1/uts: 3/0xeffffffe, 2/ipc: 3/0xefffffff, 3/pid: 3/0xeffffffc, 4/user: 3/0xeffffffd, 5/mnt: 3/0xf0000000, 6/cgroup: 3/0xeffffffb] 0x1151e0 [0x30]: event: 9 . . ... raw event: size 48 bytes . 0000: 09 00 00 00 02 00 30 00 c4 71 82 68 0c 7f 00 00 ......0..q.h.... . 0010: a9 39 00 00 a9 39 00 00 94 28 fe 63 d8 01 00 00 .9...9...(.c.... . 0020: 03 00 00 00 00 00 00 00 ce c4 02 00 00 00 00 00 ................ <SNIP> NAMESPACES events: 1 <SNIP> # Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Alexei Starovoitov <ast@fb.com> Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com> Cc: Aravinda Prasad <aravinda@linux.vnet.ibm.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: Eric Biederman <ebiederm@xmission.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Sargun Dhillon <sargun@sargun.me> Cc: Steven Rostedt <rostedt@goodmis.org> Link: http://lkml.kernel.org/r/148891930386.25309.18412039920746995488.stgit@hbathini.in.ibm.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-03-08 04:41:43 +08:00
{
struct namespaces *namespaces;
u64 link_info_size = ((event ? event->nr_namespaces : NR_NAMESPACES) *
sizeof(struct perf_ns_link_info));
namespaces = zalloc(sizeof(struct namespaces) + link_info_size);
if (!namespaces)
return NULL;
namespaces->end_time = -1;
if (event)
memcpy(namespaces->link_info, event->link_info, link_info_size);
return namespaces;
}
void namespaces__free(struct namespaces *namespaces)
{
free(namespaces);
}
int nsinfo__init(struct nsinfo *nsi)
{
char oldns[PATH_MAX];
char spath[PATH_MAX];
char *newns = NULL;
char *statln = NULL;
char *nspid;
struct stat old_stat;
struct stat new_stat;
FILE *f = NULL;
size_t linesz = 0;
int rv = -1;
if (snprintf(oldns, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX)
return rv;
if (asprintf(&newns, "/proc/%d/ns/mnt", nsi->pid) == -1)
return rv;
if (stat(oldns, &old_stat) < 0)
goto out;
if (stat(newns, &new_stat) < 0)
goto out;
/* Check if the mount namespaces differ, if so then indicate that we
* want to switch as part of looking up dso/map data.
*/
if (old_stat.st_ino != new_stat.st_ino) {
nsi->need_setns = true;
nsi->mntns_path = newns;
newns = NULL;
}
/* If we're dealing with a process that is in a different PID namespace,
* attempt to work out the innermost tgid for the process.
*/
if (snprintf(spath, PATH_MAX, "/proc/%d/status", nsi->pid) >= PATH_MAX)
goto out;
f = fopen(spath, "r");
if (f == NULL)
goto out;
while (getline(&statln, &linesz, f) != -1) {
/* Use tgid if CONFIG_PID_NS is not defined. */
if (strstr(statln, "Tgid:") != NULL) {
nsi->tgid = (pid_t)strtol(strrchr(statln, '\t'),
NULL, 10);
nsi->nstgid = nsi->tgid;
}
if (strstr(statln, "NStgid:") != NULL) {
nspid = strrchr(statln, '\t');
nsi->nstgid = (pid_t)strtol(nspid, NULL, 10);
/* If innermost tgid is not the first, process is in a different
* PID namespace.
*/
nsi->in_pidns = (statln + sizeof("NStgid:") - 1) != nspid;
break;
}
}
rv = 0;
out:
if (f != NULL)
(void) fclose(f);
free(statln);
free(newns);
return rv;
}
struct nsinfo *nsinfo__new(pid_t pid)
{
struct nsinfo *nsi;
if (pid == 0)
return NULL;
nsi = calloc(1, sizeof(*nsi));
if (nsi != NULL) {
nsi->pid = pid;
nsi->tgid = pid;
nsi->nstgid = pid;
nsi->need_setns = false;
nsi->in_pidns = false;
/* Init may fail if the process exits while we're trying to look
* at its proc information. In that case, save the pid but
* don't try to enter the namespace.
*/
if (nsinfo__init(nsi) == -1)
nsi->need_setns = false;
refcount_set(&nsi->refcnt, 1);
}
return nsi;
}
struct nsinfo *nsinfo__copy(struct nsinfo *nsi)
{
struct nsinfo *nnsi;
if (nsi == NULL)
return NULL;
nnsi = calloc(1, sizeof(*nnsi));
if (nnsi != NULL) {
nnsi->pid = nsi->pid;
nnsi->tgid = nsi->tgid;
nnsi->nstgid = nsi->nstgid;
nnsi->need_setns = nsi->need_setns;
nnsi->in_pidns = nsi->in_pidns;
if (nsi->mntns_path) {
nnsi->mntns_path = strdup(nsi->mntns_path);
if (!nnsi->mntns_path) {
free(nnsi);
return NULL;
}
}
refcount_set(&nnsi->refcnt, 1);
}
return nnsi;
}
void nsinfo__delete(struct nsinfo *nsi)
{
zfree(&nsi->mntns_path);
free(nsi);
}
struct nsinfo *nsinfo__get(struct nsinfo *nsi)
{
if (nsi)
refcount_inc(&nsi->refcnt);
return nsi;
}
void nsinfo__put(struct nsinfo *nsi)
{
if (nsi && refcount_dec_and_test(&nsi->refcnt))
nsinfo__delete(nsi);
}
void nsinfo__mountns_enter(struct nsinfo *nsi,
struct nscookie *nc)
{
char curpath[PATH_MAX];
int oldns = -1;
int newns = -1;
char *oldcwd = NULL;
if (nc == NULL)
return;
nc->oldns = -1;
nc->newns = -1;
if (!nsi || !nsi->need_setns)
return;
if (snprintf(curpath, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX)
return;
oldcwd = get_current_dir_name();
if (!oldcwd)
return;
oldns = open(curpath, O_RDONLY);
if (oldns < 0)
goto errout;
newns = open(nsi->mntns_path, O_RDONLY);
if (newns < 0)
goto errout;
if (setns(newns, CLONE_NEWNS) < 0)
goto errout;
nc->oldcwd = oldcwd;
nc->oldns = oldns;
nc->newns = newns;
return;
errout:
free(oldcwd);
if (oldns > -1)
close(oldns);
if (newns > -1)
close(newns);
}
void nsinfo__mountns_exit(struct nscookie *nc)
{
if (nc == NULL || nc->oldns == -1 || nc->newns == -1 || !nc->oldcwd)
return;
setns(nc->oldns, CLONE_NEWNS);
if (nc->oldcwd) {
WARN_ON_ONCE(chdir(nc->oldcwd));
zfree(&nc->oldcwd);
}
if (nc->oldns > -1) {
close(nc->oldns);
nc->oldns = -1;
}
if (nc->newns > -1) {
close(nc->newns);
nc->newns = -1;
}
}
char *nsinfo__realpath(const char *path, struct nsinfo *nsi)
{
char *rpath;
struct nscookie nsc;
nsinfo__mountns_enter(nsi, &nsc);
rpath = realpath(path, NULL);
nsinfo__mountns_exit(&nsc);
return rpath;
}