Merge branch 'bpf-lockdep-fixes'

Alexei Starovoitov says:

====================
v1->v2:
- reworded 2nd patch. It's a real dead lock. Not a false positive
- dropped the lockdep fix for up_read_non_owner in bpf_get_stackid

In addition to preempt_disable patch for socket filters
https://patchwork.ozlabs.org/patch/1032437/
First patch fixes lockdep false positive in percpu_freelist
Second patch fixes potential deadlock in bpf_prog_register
Third patch fixes another potential deadlock in stackmap access
from tracing bpf prog and from syscall.
====================

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
This commit is contained in:
Daniel Borkmann 2019-01-31 23:18:22 +01:00
commit f01c280387
5 changed files with 47 additions and 28 deletions

View File

@ -686,7 +686,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
} }
if (htab_is_prealloc(htab)) { if (htab_is_prealloc(htab)) {
pcpu_freelist_push(&htab->freelist, &l->fnode); __pcpu_freelist_push(&htab->freelist, &l->fnode);
} else { } else {
atomic_dec(&htab->count); atomic_dec(&htab->count);
l->htab = htab; l->htab = htab;
@ -748,7 +748,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
} else { } else {
struct pcpu_freelist_node *l; struct pcpu_freelist_node *l;
l = pcpu_freelist_pop(&htab->freelist); l = __pcpu_freelist_pop(&htab->freelist);
if (!l) if (!l)
return ERR_PTR(-E2BIG); return ERR_PTR(-E2BIG);
l_new = container_of(l, struct htab_elem, fnode); l_new = container_of(l, struct htab_elem, fnode);

View File

@ -28,8 +28,8 @@ void pcpu_freelist_destroy(struct pcpu_freelist *s)
free_percpu(s->freelist); free_percpu(s->freelist);
} }
static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head, static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head,
struct pcpu_freelist_node *node) struct pcpu_freelist_node *node)
{ {
raw_spin_lock(&head->lock); raw_spin_lock(&head->lock);
node->next = head->first; node->next = head->first;
@ -37,12 +37,22 @@ static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head,
raw_spin_unlock(&head->lock); raw_spin_unlock(&head->lock);
} }
void pcpu_freelist_push(struct pcpu_freelist *s, void __pcpu_freelist_push(struct pcpu_freelist *s,
struct pcpu_freelist_node *node) struct pcpu_freelist_node *node)
{ {
struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist); struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist);
__pcpu_freelist_push(head, node); ___pcpu_freelist_push(head, node);
}
void pcpu_freelist_push(struct pcpu_freelist *s,
struct pcpu_freelist_node *node)
{
unsigned long flags;
local_irq_save(flags);
__pcpu_freelist_push(s, node);
local_irq_restore(flags);
} }
void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
@ -63,7 +73,7 @@ void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
again: again:
head = per_cpu_ptr(s->freelist, cpu); head = per_cpu_ptr(s->freelist, cpu);
__pcpu_freelist_push(head, buf); ___pcpu_freelist_push(head, buf);
i++; i++;
buf += elem_size; buf += elem_size;
if (i == nr_elems) if (i == nr_elems)
@ -74,14 +84,12 @@ again:
local_irq_restore(flags); local_irq_restore(flags);
} }
struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s)
{ {
struct pcpu_freelist_head *head; struct pcpu_freelist_head *head;
struct pcpu_freelist_node *node; struct pcpu_freelist_node *node;
unsigned long flags;
int orig_cpu, cpu; int orig_cpu, cpu;
local_irq_save(flags);
orig_cpu = cpu = raw_smp_processor_id(); orig_cpu = cpu = raw_smp_processor_id();
while (1) { while (1) {
head = per_cpu_ptr(s->freelist, cpu); head = per_cpu_ptr(s->freelist, cpu);
@ -89,16 +97,25 @@ struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s)
node = head->first; node = head->first;
if (node) { if (node) {
head->first = node->next; head->first = node->next;
raw_spin_unlock_irqrestore(&head->lock, flags); raw_spin_unlock(&head->lock);
return node; return node;
} }
raw_spin_unlock(&head->lock); raw_spin_unlock(&head->lock);
cpu = cpumask_next(cpu, cpu_possible_mask); cpu = cpumask_next(cpu, cpu_possible_mask);
if (cpu >= nr_cpu_ids) if (cpu >= nr_cpu_ids)
cpu = 0; cpu = 0;
if (cpu == orig_cpu) { if (cpu == orig_cpu)
local_irq_restore(flags);
return NULL; return NULL;
}
} }
} }
struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s)
{
struct pcpu_freelist_node *ret;
unsigned long flags;
local_irq_save(flags);
ret = __pcpu_freelist_pop(s);
local_irq_restore(flags);
return ret;
}

View File

@ -22,8 +22,12 @@ struct pcpu_freelist_node {
struct pcpu_freelist_node *next; struct pcpu_freelist_node *next;
}; };
/* pcpu_freelist_* do spin_lock_irqsave. */
void pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *); void pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *);
struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *); struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *);
/* __pcpu_freelist_* do spin_lock only. caller must disable irqs. */
void __pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *);
struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *);
void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
u32 nr_elems); u32 nr_elems);
int pcpu_freelist_init(struct pcpu_freelist *); int pcpu_freelist_init(struct pcpu_freelist *);

View File

@ -713,8 +713,13 @@ static int map_lookup_elem(union bpf_attr *attr)
if (bpf_map_is_dev_bound(map)) { if (bpf_map_is_dev_bound(map)) {
err = bpf_map_offload_lookup_elem(map, key, value); err = bpf_map_offload_lookup_elem(map, key, value);
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || goto done;
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { }
preempt_disable();
this_cpu_inc(bpf_prog_active);
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
err = bpf_percpu_hash_copy(map, key, value); err = bpf_percpu_hash_copy(map, key, value);
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
err = bpf_percpu_array_copy(map, key, value); err = bpf_percpu_array_copy(map, key, value);
@ -744,7 +749,10 @@ static int map_lookup_elem(union bpf_attr *attr)
} }
rcu_read_unlock(); rcu_read_unlock();
} }
this_cpu_dec(bpf_prog_active);
preempt_enable();
done:
if (err) if (err)
goto free_value; goto free_value;

View File

@ -1204,22 +1204,12 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *
int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog) int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
{ {
int err; return __bpf_probe_register(btp, prog);
mutex_lock(&bpf_event_mutex);
err = __bpf_probe_register(btp, prog);
mutex_unlock(&bpf_event_mutex);
return err;
} }
int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog) int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
{ {
int err; return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog);
mutex_lock(&bpf_event_mutex);
err = tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog);
mutex_unlock(&bpf_event_mutex);
return err;
} }
int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,