mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2025-01-19 12:24:34 +08:00
e00c7b216f
1) The test_lru_map and test_lru_dist fails building on my machine since the sys/resource.h header is not included. 2) test_verifier fails in one test case where we try to call an invalid function, since the verifier log output changed wrt printing function names. 3) Current selftest suite code relies on sysconf(_SC_NPROCESSORS_CONF) for retrieving the number of possible CPUs. This is broken at least in our scenario and really just doesn't work. glibc tries a number of things for retrieving _SC_NPROCESSORS_CONF. First it tries equivalent of /sys/devices/system/cpu/cpu[0-9]* | wc -l, if that fails, depending on the config, it either tries to count CPUs in /proc/cpuinfo, or returns the _SC_NPROCESSORS_ONLN value instead. If /proc/cpuinfo has some issue, it returns just 1 worst case. This oddity is nothing new [1], but semantics/behaviour seems to be settled. _SC_NPROCESSORS_ONLN will parse /sys/devices/system/cpu/online, if that fails it looks into /proc/stat for cpuX entries, and if also that fails for some reason, /proc/cpuinfo is consulted (and returning 1 if unlikely all breaks down). While that might match num_possible_cpus() from the kernel in some cases, it's really not guaranteed with CPU hotplugging, and can result in a buffer overflow since the array in user space could have too few number of slots, and on perpcu map lookup, the kernel will write beyond that memory of the value buffer. William Tu reported such mismatches: [...] The fact that sysconf(_SC_NPROCESSORS_CONF) != num_possible_cpu() happens when CPU hotadd is enabled. For example, in Fusion when setting vcpu.hotadd = "TRUE" or in KVM, setting ./qemu-system-x86_64 -smp 2, maxcpus=4 ... the num_possible_cpu() will be 4 and sysconf() will be 2 [2]. [...] Documentation/cputopology.txt says /sys/devices/system/cpu/possible outputs cpu_possible_mask. That is the same as in num_possible_cpus(), so first step would be to fix the _SC_NPROCESSORS_CONF calls with our own implementation. Later, we could add support to bpf(2) for passing a mask via CPU_SET(3), for example, to just select a subset of CPUs. BPF samples code needs this fix as well (at least so that people stop copying this). Thus, define bpf_num_possible_cpus() once in selftests and import it from there for the sample code to avoid duplicating it. The remaining sysconf(_SC_NPROCESSORS_CONF) in samples are unrelated. After all three issues are fixed, the test suite runs fine again: # make run_tests | grep self selftests: test_verifier [PASS] selftests: test_maps [PASS] selftests: test_lru_map [PASS] selftests: test_kmod.sh [PASS] [1] https://www.sourceware.org/ml/libc-alpha/2011-06/msg00079.html [2] https://www.mail-archive.com/netdev@vger.kernel.org/msg121183.html Fixes:3059303f59
("samples/bpf: update tracex[23] examples to use per-cpu maps") Fixes:86af8b4191
("Add sample for adding simple drop program to link") Fixes:df570f5772
("samples/bpf: unit test for BPF_MAP_TYPE_PERCPU_ARRAY") Fixes:e155967179
("samples/bpf: unit test for BPF_MAP_TYPE_PERCPU_HASH") Fixes:ebb676daa1
("bpf: Print function name in addition to function id") Fixes:5db58faf98
("bpf: Add tests for the LRU bpf_htab") Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Cc: William Tu <u9012063@gmail.com> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
542 lines
12 KiB
C
542 lines
12 KiB
C
/*
|
|
* Copyright (c) 2016 Facebook
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of version 2 of the GNU General Public
|
|
* License as published by the Free Software Foundation.
|
|
*/
|
|
#define _GNU_SOURCE
|
|
#include <linux/types.h>
|
|
#include <stdio.h>
|
|
#include <unistd.h>
|
|
#include <linux/bpf.h>
|
|
#include <errno.h>
|
|
#include <string.h>
|
|
#include <assert.h>
|
|
#include <sched.h>
|
|
#include <sys/wait.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/resource.h>
|
|
#include <fcntl.h>
|
|
#include <stdlib.h>
|
|
#include <time.h>
|
|
|
|
#include "libbpf.h"
|
|
#include "bpf_util.h"
|
|
|
|
#define min(a, b) ((a) < (b) ? (a) : (b))
|
|
#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
|
|
#define container_of(ptr, type, member) ({ \
|
|
const typeof( ((type *)0)->member ) *__mptr = (ptr); \
|
|
(type *)( (char *)__mptr - offsetof(type,member) );})
|
|
|
|
static int nr_cpus;
|
|
static unsigned long long *dist_keys;
|
|
static unsigned int dist_key_counts;
|
|
|
|
struct list_head {
|
|
struct list_head *next, *prev;
|
|
};
|
|
|
|
static inline void INIT_LIST_HEAD(struct list_head *list)
|
|
{
|
|
list->next = list;
|
|
list->prev = list;
|
|
}
|
|
|
|
static inline int list_empty(const struct list_head *head)
|
|
{
|
|
return head->next == head;
|
|
}
|
|
|
|
static inline void __list_add(struct list_head *new,
|
|
struct list_head *prev,
|
|
struct list_head *next)
|
|
{
|
|
next->prev = new;
|
|
new->next = next;
|
|
new->prev = prev;
|
|
prev->next = new;
|
|
}
|
|
|
|
static inline void list_add(struct list_head *new, struct list_head *head)
|
|
{
|
|
__list_add(new, head, head->next);
|
|
}
|
|
|
|
static inline void __list_del(struct list_head *prev, struct list_head *next)
|
|
{
|
|
next->prev = prev;
|
|
prev->next = next;
|
|
}
|
|
|
|
static inline void __list_del_entry(struct list_head *entry)
|
|
{
|
|
__list_del(entry->prev, entry->next);
|
|
}
|
|
|
|
static inline void list_move(struct list_head *list, struct list_head *head)
|
|
{
|
|
__list_del_entry(list);
|
|
list_add(list, head);
|
|
}
|
|
|
|
#define list_entry(ptr, type, member) \
|
|
container_of(ptr, type, member)
|
|
|
|
#define list_last_entry(ptr, type, member) \
|
|
list_entry((ptr)->prev, type, member)
|
|
|
|
struct pfect_lru_node {
|
|
struct list_head list;
|
|
unsigned long long key;
|
|
};
|
|
|
|
struct pfect_lru {
|
|
struct list_head list;
|
|
struct pfect_lru_node *free_nodes;
|
|
unsigned int cur_size;
|
|
unsigned int lru_size;
|
|
unsigned int nr_unique;
|
|
unsigned int nr_misses;
|
|
unsigned int total;
|
|
int map_fd;
|
|
};
|
|
|
|
static void pfect_lru_init(struct pfect_lru *lru, unsigned int lru_size,
|
|
unsigned int nr_possible_elems)
|
|
{
|
|
lru->map_fd = bpf_create_map(BPF_MAP_TYPE_HASH,
|
|
sizeof(unsigned long long),
|
|
sizeof(struct pfect_lru_node *),
|
|
nr_possible_elems, 0);
|
|
assert(lru->map_fd != -1);
|
|
|
|
lru->free_nodes = malloc(lru_size * sizeof(struct pfect_lru_node));
|
|
assert(lru->free_nodes);
|
|
|
|
INIT_LIST_HEAD(&lru->list);
|
|
lru->cur_size = 0;
|
|
lru->lru_size = lru_size;
|
|
lru->nr_unique = lru->nr_misses = lru->total = 0;
|
|
}
|
|
|
|
static void pfect_lru_destroy(struct pfect_lru *lru)
|
|
{
|
|
close(lru->map_fd);
|
|
free(lru->free_nodes);
|
|
}
|
|
|
|
static int pfect_lru_lookup_or_insert(struct pfect_lru *lru,
|
|
unsigned long long key)
|
|
{
|
|
struct pfect_lru_node *node = NULL;
|
|
int seen = 0;
|
|
|
|
lru->total++;
|
|
if (!bpf_lookup_elem(lru->map_fd, &key, &node)) {
|
|
if (node) {
|
|
list_move(&node->list, &lru->list);
|
|
return 1;
|
|
}
|
|
seen = 1;
|
|
}
|
|
|
|
if (lru->cur_size < lru->lru_size) {
|
|
node = &lru->free_nodes[lru->cur_size++];
|
|
INIT_LIST_HEAD(&node->list);
|
|
} else {
|
|
struct pfect_lru_node *null_node = NULL;
|
|
|
|
node = list_last_entry(&lru->list,
|
|
struct pfect_lru_node,
|
|
list);
|
|
bpf_update_elem(lru->map_fd, &node->key, &null_node, BPF_EXIST);
|
|
}
|
|
|
|
node->key = key;
|
|
list_move(&node->list, &lru->list);
|
|
|
|
lru->nr_misses++;
|
|
if (seen) {
|
|
assert(!bpf_update_elem(lru->map_fd, &key, &node, BPF_EXIST));
|
|
} else {
|
|
lru->nr_unique++;
|
|
assert(!bpf_update_elem(lru->map_fd, &key, &node, BPF_NOEXIST));
|
|
}
|
|
|
|
return seen;
|
|
}
|
|
|
|
static unsigned int read_keys(const char *dist_file,
|
|
unsigned long long **keys)
|
|
{
|
|
struct stat fst;
|
|
unsigned long long *retkeys;
|
|
unsigned int counts = 0;
|
|
int dist_fd;
|
|
char *b, *l;
|
|
int i;
|
|
|
|
dist_fd = open(dist_file, 0);
|
|
assert(dist_fd != -1);
|
|
|
|
assert(fstat(dist_fd, &fst) == 0);
|
|
b = malloc(fst.st_size);
|
|
assert(b);
|
|
|
|
assert(read(dist_fd, b, fst.st_size) == fst.st_size);
|
|
close(dist_fd);
|
|
for (i = 0; i < fst.st_size; i++) {
|
|
if (b[i] == '\n')
|
|
counts++;
|
|
}
|
|
counts++; /* in case the last line has no \n */
|
|
|
|
retkeys = malloc(counts * sizeof(unsigned long long));
|
|
assert(retkeys);
|
|
|
|
counts = 0;
|
|
for (l = strtok(b, "\n"); l; l = strtok(NULL, "\n"))
|
|
retkeys[counts++] = strtoull(l, NULL, 10);
|
|
free(b);
|
|
|
|
*keys = retkeys;
|
|
|
|
return counts;
|
|
}
|
|
|
|
static int create_map(int map_type, int map_flags, unsigned int size)
|
|
{
|
|
int map_fd;
|
|
|
|
map_fd = bpf_create_map(map_type, sizeof(unsigned long long),
|
|
sizeof(unsigned long long), size, map_flags);
|
|
|
|
if (map_fd == -1)
|
|
perror("bpf_create_map");
|
|
|
|
return map_fd;
|
|
}
|
|
|
|
static int sched_next_online(int pid, int next_to_try)
|
|
{
|
|
cpu_set_t cpuset;
|
|
|
|
if (next_to_try == nr_cpus)
|
|
return -1;
|
|
|
|
while (next_to_try < nr_cpus) {
|
|
CPU_ZERO(&cpuset);
|
|
CPU_SET(next_to_try++, &cpuset);
|
|
if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset))
|
|
break;
|
|
}
|
|
|
|
return next_to_try;
|
|
}
|
|
|
|
static void run_parallel(unsigned int tasks, void (*fn)(int i, void *data),
|
|
void *data)
|
|
{
|
|
int next_sched_cpu = 0;
|
|
pid_t pid[tasks];
|
|
int i;
|
|
|
|
for (i = 0; i < tasks; i++) {
|
|
pid[i] = fork();
|
|
if (pid[i] == 0) {
|
|
next_sched_cpu = sched_next_online(0, next_sched_cpu);
|
|
fn(i, data);
|
|
exit(0);
|
|
} else if (pid[i] == -1) {
|
|
printf("couldn't spawn #%d process\n", i);
|
|
exit(1);
|
|
}
|
|
/* It is mostly redundant and just allow the parent
|
|
* process to update next_shced_cpu for the next child
|
|
* process
|
|
*/
|
|
next_sched_cpu = sched_next_online(pid[i], next_sched_cpu);
|
|
}
|
|
for (i = 0; i < tasks; i++) {
|
|
int status;
|
|
|
|
assert(waitpid(pid[i], &status, 0) == pid[i]);
|
|
assert(status == 0);
|
|
}
|
|
}
|
|
|
|
static void do_test_lru_dist(int task, void *data)
|
|
{
|
|
unsigned int nr_misses = 0;
|
|
struct pfect_lru pfect_lru;
|
|
unsigned long long key, value = 1234;
|
|
unsigned int i;
|
|
|
|
unsigned int lru_map_fd = ((unsigned int *)data)[0];
|
|
unsigned int lru_size = ((unsigned int *)data)[1];
|
|
unsigned long long key_offset = task * dist_key_counts;
|
|
|
|
pfect_lru_init(&pfect_lru, lru_size, dist_key_counts);
|
|
|
|
for (i = 0; i < dist_key_counts; i++) {
|
|
key = dist_keys[i] + key_offset;
|
|
|
|
pfect_lru_lookup_or_insert(&pfect_lru, key);
|
|
|
|
if (!bpf_lookup_elem(lru_map_fd, &key, &value))
|
|
continue;
|
|
|
|
if (bpf_update_elem(lru_map_fd, &key, &value, BPF_NOEXIST)) {
|
|
printf("bpf_update_elem(lru_map_fd, %llu): errno:%d\n",
|
|
key, errno);
|
|
assert(0);
|
|
}
|
|
|
|
nr_misses++;
|
|
}
|
|
|
|
printf(" task:%d BPF LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n",
|
|
task, pfect_lru.nr_unique, dist_key_counts, nr_misses,
|
|
dist_key_counts);
|
|
printf(" task:%d Perfect LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n",
|
|
task, pfect_lru.nr_unique, pfect_lru.total,
|
|
pfect_lru.nr_misses, pfect_lru.total);
|
|
|
|
pfect_lru_destroy(&pfect_lru);
|
|
close(lru_map_fd);
|
|
}
|
|
|
|
static void test_parallel_lru_dist(int map_type, int map_flags,
|
|
int nr_tasks, unsigned int lru_size)
|
|
{
|
|
int child_data[2];
|
|
int lru_map_fd;
|
|
|
|
printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type,
|
|
map_flags);
|
|
|
|
if (map_flags & BPF_F_NO_COMMON_LRU)
|
|
lru_map_fd = create_map(map_type, map_flags,
|
|
nr_cpus * lru_size);
|
|
else
|
|
lru_map_fd = create_map(map_type, map_flags,
|
|
nr_tasks * lru_size);
|
|
assert(lru_map_fd != -1);
|
|
|
|
child_data[0] = lru_map_fd;
|
|
child_data[1] = lru_size;
|
|
|
|
run_parallel(nr_tasks, do_test_lru_dist, child_data);
|
|
|
|
close(lru_map_fd);
|
|
}
|
|
|
|
static void test_lru_loss0(int map_type, int map_flags)
|
|
{
|
|
unsigned long long key, value[nr_cpus];
|
|
unsigned int old_unused_losses = 0;
|
|
unsigned int new_unused_losses = 0;
|
|
unsigned int used_losses = 0;
|
|
int map_fd;
|
|
|
|
printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
|
|
map_flags);
|
|
|
|
assert(sched_next_online(0, 0) != -1);
|
|
|
|
if (map_flags & BPF_F_NO_COMMON_LRU)
|
|
map_fd = create_map(map_type, map_flags, 900 * nr_cpus);
|
|
else
|
|
map_fd = create_map(map_type, map_flags, 900);
|
|
|
|
assert(map_fd != -1);
|
|
|
|
value[0] = 1234;
|
|
|
|
for (key = 1; key <= 1000; key++) {
|
|
int start_key, end_key;
|
|
|
|
assert(bpf_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0);
|
|
|
|
start_key = 101;
|
|
end_key = min(key, 900);
|
|
|
|
while (start_key <= end_key) {
|
|
bpf_lookup_elem(map_fd, &start_key, value);
|
|
start_key++;
|
|
}
|
|
}
|
|
|
|
for (key = 1; key <= 1000; key++) {
|
|
if (bpf_lookup_elem(map_fd, &key, value)) {
|
|
if (key <= 100)
|
|
old_unused_losses++;
|
|
else if (key <= 900)
|
|
used_losses++;
|
|
else
|
|
new_unused_losses++;
|
|
}
|
|
}
|
|
|
|
close(map_fd);
|
|
|
|
printf("older-elem-losses:%d(/100) active-elem-losses:%d(/800) "
|
|
"newer-elem-losses:%d(/100)\n",
|
|
old_unused_losses, used_losses, new_unused_losses);
|
|
}
|
|
|
|
static void test_lru_loss1(int map_type, int map_flags)
|
|
{
|
|
unsigned long long key, value[nr_cpus];
|
|
int map_fd;
|
|
unsigned int nr_losses = 0;
|
|
|
|
printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
|
|
map_flags);
|
|
|
|
assert(sched_next_online(0, 0) != -1);
|
|
|
|
if (map_flags & BPF_F_NO_COMMON_LRU)
|
|
map_fd = create_map(map_type, map_flags, 1000 * nr_cpus);
|
|
else
|
|
map_fd = create_map(map_type, map_flags, 1000);
|
|
|
|
assert(map_fd != -1);
|
|
|
|
value[0] = 1234;
|
|
|
|
for (key = 1; key <= 1000; key++)
|
|
assert(!bpf_update_elem(map_fd, &key, value, BPF_NOEXIST));
|
|
|
|
for (key = 1; key <= 1000; key++) {
|
|
if (bpf_lookup_elem(map_fd, &key, value))
|
|
nr_losses++;
|
|
}
|
|
|
|
close(map_fd);
|
|
|
|
printf("nr_losses:%d(/1000)\n", nr_losses);
|
|
}
|
|
|
|
static void do_test_parallel_lru_loss(int task, void *data)
|
|
{
|
|
const unsigned int nr_stable_elems = 1000;
|
|
const unsigned int nr_repeats = 100000;
|
|
|
|
int map_fd = *(int *)data;
|
|
unsigned long long stable_base;
|
|
unsigned long long key, value[nr_cpus];
|
|
unsigned long long next_ins_key;
|
|
unsigned int nr_losses = 0;
|
|
unsigned int i;
|
|
|
|
stable_base = task * nr_repeats * 2 + 1;
|
|
next_ins_key = stable_base;
|
|
value[0] = 1234;
|
|
for (i = 0; i < nr_stable_elems; i++) {
|
|
assert(bpf_update_elem(map_fd, &next_ins_key, value,
|
|
BPF_NOEXIST) == 0);
|
|
next_ins_key++;
|
|
}
|
|
|
|
for (i = 0; i < nr_repeats; i++) {
|
|
int rn;
|
|
|
|
rn = rand();
|
|
|
|
if (rn % 10) {
|
|
key = rn % nr_stable_elems + stable_base;
|
|
bpf_lookup_elem(map_fd, &key, value);
|
|
} else {
|
|
bpf_update_elem(map_fd, &next_ins_key, value,
|
|
BPF_NOEXIST);
|
|
next_ins_key++;
|
|
}
|
|
}
|
|
|
|
key = stable_base;
|
|
for (i = 0; i < nr_stable_elems; i++) {
|
|
if (bpf_lookup_elem(map_fd, &key, value))
|
|
nr_losses++;
|
|
key++;
|
|
}
|
|
|
|
printf(" task:%d nr_losses:%u\n", task, nr_losses);
|
|
}
|
|
|
|
static void test_parallel_lru_loss(int map_type, int map_flags, int nr_tasks)
|
|
{
|
|
int map_fd;
|
|
|
|
printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type,
|
|
map_flags);
|
|
|
|
/* Give 20% more than the active working set */
|
|
if (map_flags & BPF_F_NO_COMMON_LRU)
|
|
map_fd = create_map(map_type, map_flags,
|
|
nr_cpus * (1000 + 200));
|
|
else
|
|
map_fd = create_map(map_type, map_flags,
|
|
nr_tasks * (1000 + 200));
|
|
|
|
assert(map_fd != -1);
|
|
|
|
run_parallel(nr_tasks, do_test_parallel_lru_loss, &map_fd);
|
|
|
|
close(map_fd);
|
|
}
|
|
|
|
int main(int argc, char **argv)
|
|
{
|
|
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
|
|
int map_flags[] = {0, BPF_F_NO_COMMON_LRU};
|
|
const char *dist_file;
|
|
int nr_tasks = 1;
|
|
int lru_size;
|
|
int f;
|
|
|
|
if (argc < 4) {
|
|
printf("Usage: %s <dist-file> <lru-size> <nr-tasks>\n",
|
|
argv[0]);
|
|
return -1;
|
|
}
|
|
|
|
dist_file = argv[1];
|
|
lru_size = atoi(argv[2]);
|
|
nr_tasks = atoi(argv[3]);
|
|
|
|
setbuf(stdout, NULL);
|
|
|
|
assert(!setrlimit(RLIMIT_MEMLOCK, &r));
|
|
|
|
srand(time(NULL));
|
|
|
|
nr_cpus = bpf_num_possible_cpus();
|
|
assert(nr_cpus != -1);
|
|
printf("nr_cpus:%d\n\n", nr_cpus);
|
|
|
|
nr_tasks = min(nr_tasks, nr_cpus);
|
|
|
|
dist_key_counts = read_keys(dist_file, &dist_keys);
|
|
if (!dist_key_counts) {
|
|
printf("%s has no key\n", dist_file);
|
|
return -1;
|
|
}
|
|
|
|
for (f = 0; f < sizeof(map_flags) / sizeof(*map_flags); f++) {
|
|
test_lru_loss0(BPF_MAP_TYPE_LRU_HASH, map_flags[f]);
|
|
test_lru_loss1(BPF_MAP_TYPE_LRU_HASH, map_flags[f]);
|
|
test_parallel_lru_loss(BPF_MAP_TYPE_LRU_HASH, map_flags[f],
|
|
nr_tasks);
|
|
test_parallel_lru_dist(BPF_MAP_TYPE_LRU_HASH, map_flags[f],
|
|
nr_tasks, lru_size);
|
|
printf("\n");
|
|
}
|
|
|
|
free(dist_keys);
|
|
|
|
return 0;
|
|
}
|