bpf: Check map->usercnt after timer->timer is assigned

[ Upstream commit fd381ce60a ]

When there are concurrent uref release and bpf timer init operations,
the following sequence diagram is possible. It will break the guarantee
provided by bpf_timer: bpf_timer will still be alive after userspace
application releases or unpins the map. It also will lead to kmemleak
for old kernel version which doesn't release bpf_timer when map is
released.

bpf program X:

bpf_timer_init()
  lock timer->lock
    read timer->timer as NULL
    read map->usercnt != 0

                process Y:

                close(map_fd)
                  // put last uref
                  bpf_map_put_uref()
                    atomic_dec_and_test(map->usercnt)
                      array_map_free_timers()
                        bpf_timer_cancel_and_free()
                          // just return
                          read timer->timer is NULL

    t = bpf_map_kmalloc_node()
    timer->timer = t
  unlock timer->lock

Fix the problem by checking map->usercnt after timer->timer is assigned,
so when there are concurrent uref release and bpf timer init, either
bpf_timer_cancel_and_free() from uref release reads a no-NULL timer
or the newly-added atomic64_read() returns a zero usercnt.

Because atomic_dec_and_test(map->usercnt) and READ_ONCE(timer->timer)
in bpf_timer_cancel_and_free() are not protected by a lock, so add
a memory barrier to guarantee the order between map->usercnt and
timer->timer. Also use WRITE_ONCE(timer->timer, x) to match the lockless
read of timer->timer in bpf_timer_cancel_and_free().

Reported-by: Hsin-Wei Hung <hsinweih@uci.edu>
Closes: https://lore.kernel.org/bpf/CABcoxUaT2k9hWsS1tNgXyoU3E-=PuOgMn737qK984fbFmfYixQ@mail.gmail.com
Fixes: b00628b1c7 ("bpf: Introduce bpf timers.")
Signed-off-by: Hou Tao <houtao1@huawei.com>
Link: https://lore.kernel.org/r/20231030063616.1653024-1-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
Hou Tao 2023-10-30 14:36:16 +08:00 committed by Greg Kroah-Hartman
parent 3907b89cd1
commit 8943083bbb

View File

@ -1118,13 +1118,6 @@ BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map
ret = -EBUSY; ret = -EBUSY;
goto out; goto out;
} }
if (!atomic64_read(&map->usercnt)) {
/* maps with timers must be either held by user space
* or pinned in bpffs.
*/
ret = -EPERM;
goto out;
}
/* allocate hrtimer via map_kmalloc to use memcg accounting */ /* allocate hrtimer via map_kmalloc to use memcg accounting */
t = bpf_map_kmalloc_node(map, sizeof(*t), GFP_ATOMIC, map->numa_node); t = bpf_map_kmalloc_node(map, sizeof(*t), GFP_ATOMIC, map->numa_node);
if (!t) { if (!t) {
@ -1137,7 +1130,21 @@ BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map
rcu_assign_pointer(t->callback_fn, NULL); rcu_assign_pointer(t->callback_fn, NULL);
hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT); hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT);
t->timer.function = bpf_timer_cb; t->timer.function = bpf_timer_cb;
timer->timer = t; WRITE_ONCE(timer->timer, t);
/* Guarantee the order between timer->timer and map->usercnt. So
* when there are concurrent uref release and bpf timer init, either
* bpf_timer_cancel_and_free() called by uref release reads a no-NULL
* timer or atomic64_read() below returns a zero usercnt.
*/
smp_mb();
if (!atomic64_read(&map->usercnt)) {
/* maps with timers must be either held by user space
* or pinned in bpffs.
*/
WRITE_ONCE(timer->timer, NULL);
kfree(t);
ret = -EPERM;
}
out: out:
__bpf_spin_unlock_irqrestore(&timer->lock); __bpf_spin_unlock_irqrestore(&timer->lock);
return ret; return ret;
@ -1305,7 +1312,7 @@ void bpf_timer_cancel_and_free(void *val)
/* The subsequent bpf_timer_start/cancel() helpers won't be able to use /* The subsequent bpf_timer_start/cancel() helpers won't be able to use
* this timer, since it won't be initialized. * this timer, since it won't be initialized.
*/ */
timer->timer = NULL; WRITE_ONCE(timer->timer, NULL);
out: out:
__bpf_spin_unlock_irqrestore(&timer->lock); __bpf_spin_unlock_irqrestore(&timer->lock);
if (!t) if (!t)