linux/tools/bpf/bpftool/pids.c
Quentin Monnet 6b4384ff10 Revert "bpftool: Use libbpf 1.0 API mode instead of RLIMIT_MEMLOCK"
This reverts commit a777e18f1b.

In commit a777e18f1b ("bpftool: Use libbpf 1.0 API mode instead of
RLIMIT_MEMLOCK"), we removed the rlimit bump in bpftool, because the
kernel has switched to memcg-based memory accounting. Thanks to the
LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK, we attempted to keep compatibility
with other systems and ask libbpf to raise the limit for us if
necessary.

How do we know if memcg-based accounting is supported? There is a probe
in libbpf to check this. But this probe currently relies on the
availability of a given BPF helper, bpf_ktime_get_coarse_ns(), which
landed in the same kernel version as the memory accounting change. This
works in the generic case, but it may fail, for example, if the helper
function has been backported to an older kernel. This has been observed
for Google Cloud's Container-Optimized OS (COS), where the helper is
available but rlimit is still in use. The probe succeeds, the rlimit is
not raised, and probing features with bpftool, for example, fails.

A patch was submitted [0] to update this probe in libbpf, based on what
the cilium/ebpf Go library does [1]. It would lower the soft rlimit to
0, attempt to load a BPF object, and reset the rlimit. But it may induce
some hard-to-debug flakiness if another process starts, or the current
application is killed, while the rlimit is reduced, and the approach was
discarded.

As a workaround to ensure that the rlimit bump does not depend on the
availability of a given helper, we restore the unconditional rlimit bump
in bpftool for now.

  [0] https://lore.kernel.org/bpf/20220609143614.97837-1-quentin@isovalent.com/
  [1] https://github.com/cilium/ebpf/blob/v0.9.0/rlimit/rlimit.go#L39

Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Yafang Shao <laoar.shao@gmail.com>
Cc: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20220610112648.29695-2-quentin@isovalent.com
2022-06-14 22:18:06 +02:00

257 lines
5.6 KiB
C

// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
/* Copyright (C) 2020 Facebook */
#include <errno.h>
#include <linux/err.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <bpf/bpf.h>
#include <bpf/hashmap.h>
#include "main.h"
#include "skeleton/pid_iter.h"
#ifdef BPFTOOL_WITHOUT_SKELETONS
int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type)
{
return -ENOTSUP;
}
void delete_obj_refs_table(struct hashmap *map) {}
void emit_obj_refs_plain(struct hashmap *map, __u32 id, const char *prefix) {}
void emit_obj_refs_json(struct hashmap *map, __u32 id, json_writer_t *json_writer) {}
#else /* BPFTOOL_WITHOUT_SKELETONS */
#include "pid_iter.skel.h"
static void add_ref(struct hashmap *map, struct pid_iter_entry *e)
{
struct hashmap_entry *entry;
struct obj_refs *refs;
struct obj_ref *ref;
int err, i;
void *tmp;
hashmap__for_each_key_entry(map, entry, u32_as_hash_field(e->id)) {
refs = entry->value;
for (i = 0; i < refs->ref_cnt; i++) {
if (refs->refs[i].pid == e->pid)
return;
}
tmp = realloc(refs->refs, (refs->ref_cnt + 1) * sizeof(*ref));
if (!tmp) {
p_err("failed to re-alloc memory for ID %u, PID %d, COMM %s...",
e->id, e->pid, e->comm);
return;
}
refs->refs = tmp;
ref = &refs->refs[refs->ref_cnt];
ref->pid = e->pid;
memcpy(ref->comm, e->comm, sizeof(ref->comm));
refs->ref_cnt++;
return;
}
/* new ref */
refs = calloc(1, sizeof(*refs));
if (!refs) {
p_err("failed to alloc memory for ID %u, PID %d, COMM %s...",
e->id, e->pid, e->comm);
return;
}
refs->refs = malloc(sizeof(*refs->refs));
if (!refs->refs) {
free(refs);
p_err("failed to alloc memory for ID %u, PID %d, COMM %s...",
e->id, e->pid, e->comm);
return;
}
ref = &refs->refs[0];
ref->pid = e->pid;
memcpy(ref->comm, e->comm, sizeof(ref->comm));
refs->ref_cnt = 1;
refs->has_bpf_cookie = e->has_bpf_cookie;
refs->bpf_cookie = e->bpf_cookie;
err = hashmap__append(map, u32_as_hash_field(e->id), refs);
if (err)
p_err("failed to append entry to hashmap for ID %u: %s",
e->id, strerror(errno));
}
static int __printf(2, 0)
libbpf_print_none(__maybe_unused enum libbpf_print_level level,
__maybe_unused const char *format,
__maybe_unused va_list args)
{
return 0;
}
int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type)
{
struct pid_iter_entry *e;
char buf[4096 / sizeof(*e) * sizeof(*e)];
struct pid_iter_bpf *skel;
int err, ret, fd = -1, i;
libbpf_print_fn_t default_print;
*map = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL);
if (IS_ERR(*map)) {
p_err("failed to create hashmap for PID references");
return -1;
}
set_max_rlimit();
skel = pid_iter_bpf__open();
if (!skel) {
p_err("failed to open PID iterator skeleton");
return -1;
}
skel->rodata->obj_type = type;
/* we don't want output polluted with libbpf errors if bpf_iter is not
* supported
*/
default_print = libbpf_set_print(libbpf_print_none);
err = pid_iter_bpf__load(skel);
libbpf_set_print(default_print);
if (err) {
/* too bad, kernel doesn't support BPF iterators yet */
err = 0;
goto out;
}
err = pid_iter_bpf__attach(skel);
if (err) {
/* if we loaded above successfully, attach has to succeed */
p_err("failed to attach PID iterator: %d", err);
goto out;
}
fd = bpf_iter_create(bpf_link__fd(skel->links.iter));
if (fd < 0) {
err = -errno;
p_err("failed to create PID iterator session: %d", err);
goto out;
}
while (true) {
ret = read(fd, buf, sizeof(buf));
if (ret < 0) {
if (errno == EAGAIN)
continue;
err = -errno;
p_err("failed to read PID iterator output: %d", err);
goto out;
}
if (ret == 0)
break;
if (ret % sizeof(*e)) {
err = -EINVAL;
p_err("invalid PID iterator output format");
goto out;
}
ret /= sizeof(*e);
e = (void *)buf;
for (i = 0; i < ret; i++, e++) {
add_ref(*map, e);
}
}
err = 0;
out:
if (fd >= 0)
close(fd);
pid_iter_bpf__destroy(skel);
return err;
}
void delete_obj_refs_table(struct hashmap *map)
{
struct hashmap_entry *entry;
size_t bkt;
if (!map)
return;
hashmap__for_each_entry(map, entry, bkt) {
struct obj_refs *refs = entry->value;
free(refs->refs);
free(refs);
}
hashmap__free(map);
}
void emit_obj_refs_json(struct hashmap *map, __u32 id,
json_writer_t *json_writer)
{
struct hashmap_entry *entry;
if (hashmap__empty(map))
return;
hashmap__for_each_key_entry(map, entry, u32_as_hash_field(id)) {
struct obj_refs *refs = entry->value;
int i;
if (refs->ref_cnt == 0)
break;
if (refs->has_bpf_cookie)
jsonw_lluint_field(json_writer, "bpf_cookie", refs->bpf_cookie);
jsonw_name(json_writer, "pids");
jsonw_start_array(json_writer);
for (i = 0; i < refs->ref_cnt; i++) {
struct obj_ref *ref = &refs->refs[i];
jsonw_start_object(json_writer);
jsonw_int_field(json_writer, "pid", ref->pid);
jsonw_string_field(json_writer, "comm", ref->comm);
jsonw_end_object(json_writer);
}
jsonw_end_array(json_writer);
break;
}
}
void emit_obj_refs_plain(struct hashmap *map, __u32 id, const char *prefix)
{
struct hashmap_entry *entry;
if (hashmap__empty(map))
return;
hashmap__for_each_key_entry(map, entry, u32_as_hash_field(id)) {
struct obj_refs *refs = entry->value;
int i;
if (refs->ref_cnt == 0)
break;
if (refs->has_bpf_cookie)
printf("\n\tbpf_cookie %llu", (unsigned long long) refs->bpf_cookie);
printf("%s", prefix);
for (i = 0; i < refs->ref_cnt; i++) {
struct obj_ref *ref = &refs->refs[i];
printf("%s%s(%d)", i == 0 ? "" : ", ", ref->comm, ref->pid);
}
break;
}
}
#endif