linux/Documentation/bpf/map_lru_hash_update.dot
Joe Stringer 1a986518b8 docs/bpf: Add LRU internals description and graph
Extend the bpf hashmap docs to include a brief description of the
internals of the LRU map type (setting appropriate API expectations),
including the original commit message from Martin and a variant on the
graph that I had presented during my Linux Plumbers Conference 2022
talk on "Pressure feedback for LRU map types"[0].

The node names in the dot file correspond roughly to the functions
where the logic for those decisions or steps is defined, to help
curious developers to cross-reference and update this logic if the
details of the LRU implementation ever differ from this description.

  [0] https://lpc.events/event/16/contributions/1368/

Signed-off-by: Joe Stringer <joe@isovalent.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Bagas Sanjaya <bagasdotme@gmail.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20230422172054.3355436-2-joe@isovalent.com
2023-04-27 14:20:44 +02:00

173 lines
7.4 KiB
Plaintext

// SPDX-License-Identifier: GPL-2.0-only
// Copyright (C) 2022-2023 Isovalent, Inc.
digraph {
node [colorscheme=accent4,style=filled] # Apply colorscheme to all nodes
graph [splines=ortho, nodesep=1]
subgraph cluster_key {
label = "Key\n(locks held during operation)";
rankdir = TB;
remote_lock [shape=rectangle,fillcolor=4,label="remote CPU LRU lock"]
hash_lock [shape=rectangle,fillcolor=3,label="hashtab lock"]
lru_lock [shape=rectangle,fillcolor=2,label="LRU lock"]
local_lock [shape=rectangle,fillcolor=1,label="local CPU LRU lock"]
no_lock [shape=rectangle,label="no locks held"]
}
begin [shape=oval,label="begin\nbpf_map_update()"]
// Nodes below with an 'fn_' prefix are roughly labeled by the C function
// names that initiate the corresponding logic in kernel/bpf/bpf_lru_list.c.
// Number suffixes and errno suffixes handle subsections of the corresponding
// logic in the function as of the writing of this dot.
// cf. __local_list_pop_free() / bpf_percpu_lru_pop_free()
local_freelist_check [shape=diamond,fillcolor=1,
label="Local freelist\nnode available?"];
use_local_node [shape=rectangle,
label="Use node owned\nby this CPU"]
// cf. bpf_lru_pop_free()
common_lru_check [shape=diamond,
label="Map created with\ncommon LRU?\n(!BPF_F_NO_COMMON_LRU)"];
fn_bpf_lru_list_pop_free_to_local [shape=rectangle,fillcolor=2,
label="Flush local pending,
Rotate Global list, move
LOCAL_FREE_TARGET
from global -> local"]
// Also corresponds to:
// fn__local_list_flush()
// fn_bpf_lru_list_rotate()
fn___bpf_lru_node_move_to_free[shape=diamond,fillcolor=2,
label="Able to free\nLOCAL_FREE_TARGET\nnodes?"]
fn___bpf_lru_list_shrink_inactive [shape=rectangle,fillcolor=3,
label="Shrink inactive list
up to remaining
LOCAL_FREE_TARGET
(global LRU -> local)"]
fn___bpf_lru_list_shrink [shape=diamond,fillcolor=2,
label="> 0 entries in\nlocal free list?"]
fn___bpf_lru_list_shrink2 [shape=rectangle,fillcolor=2,
label="Steal one node from
inactive, or if empty,
from active global list"]
fn___bpf_lru_list_shrink3 [shape=rectangle,fillcolor=3,
label="Try to remove\nnode from hashtab"]
local_freelist_check2 [shape=diamond,label="Htab removal\nsuccessful?"]
common_lru_check2 [shape=diamond,
label="Map created with\ncommon LRU?\n(!BPF_F_NO_COMMON_LRU)"];
subgraph cluster_remote_lock {
label = "Iterate through CPUs\n(start from current)";
style = dashed;
rankdir=LR;
local_freelist_check5 [shape=diamond,fillcolor=4,
label="Steal a node from\nper-cpu freelist?"]
local_freelist_check6 [shape=rectangle,fillcolor=4,
label="Steal a node from
(1) Unreferenced pending, or
(2) Any pending node"]
local_freelist_check7 [shape=rectangle,fillcolor=3,
label="Try to remove\nnode from hashtab"]
fn_htab_lru_map_update_elem [shape=diamond,
label="Stole node\nfrom remote\nCPU?"]
fn_htab_lru_map_update_elem2 [shape=diamond,label="Iterated\nall CPUs?"]
// Also corresponds to:
// use_local_node()
// fn__local_list_pop_pending()
}
fn_bpf_lru_list_pop_free_to_local2 [shape=rectangle,
label="Use node that was\nnot recently referenced"]
local_freelist_check4 [shape=rectangle,
label="Use node that was\nactively referenced\nin global list"]
fn_htab_lru_map_update_elem_ENOMEM [shape=oval,label="return -ENOMEM"]
fn_htab_lru_map_update_elem3 [shape=rectangle,
label="Use node that was\nactively referenced\nin (another?) CPU's cache"]
fn_htab_lru_map_update_elem4 [shape=rectangle,fillcolor=3,
label="Update hashmap\nwith new element"]
fn_htab_lru_map_update_elem5 [shape=oval,label="return 0"]
fn_htab_lru_map_update_elem_EBUSY [shape=oval,label="return -EBUSY"]
fn_htab_lru_map_update_elem_EEXIST [shape=oval,label="return -EEXIST"]
fn_htab_lru_map_update_elem_ENOENT [shape=oval,label="return -ENOENT"]
begin -> local_freelist_check
local_freelist_check -> use_local_node [xlabel="Y"]
local_freelist_check -> common_lru_check [xlabel="N"]
common_lru_check -> fn_bpf_lru_list_pop_free_to_local [xlabel="Y"]
common_lru_check -> fn___bpf_lru_list_shrink_inactive [xlabel="N"]
fn_bpf_lru_list_pop_free_to_local -> fn___bpf_lru_node_move_to_free
fn___bpf_lru_node_move_to_free ->
fn_bpf_lru_list_pop_free_to_local2 [xlabel="Y"]
fn___bpf_lru_node_move_to_free ->
fn___bpf_lru_list_shrink_inactive [xlabel="N"]
fn___bpf_lru_list_shrink_inactive -> fn___bpf_lru_list_shrink
fn___bpf_lru_list_shrink -> fn_bpf_lru_list_pop_free_to_local2 [xlabel = "Y"]
fn___bpf_lru_list_shrink -> fn___bpf_lru_list_shrink2 [xlabel="N"]
fn___bpf_lru_list_shrink2 -> fn___bpf_lru_list_shrink3
fn___bpf_lru_list_shrink3 -> local_freelist_check2
local_freelist_check2 -> local_freelist_check4 [xlabel = "Y"]
local_freelist_check2 -> common_lru_check2 [xlabel = "N"]
common_lru_check2 -> local_freelist_check5 [xlabel = "Y"]
common_lru_check2 -> fn_htab_lru_map_update_elem_ENOMEM [xlabel = "N"]
local_freelist_check5 -> fn_htab_lru_map_update_elem [xlabel = "Y"]
local_freelist_check5 -> local_freelist_check6 [xlabel = "N"]
local_freelist_check6 -> local_freelist_check7
local_freelist_check7 -> fn_htab_lru_map_update_elem
fn_htab_lru_map_update_elem -> fn_htab_lru_map_update_elem3 [xlabel = "Y"]
fn_htab_lru_map_update_elem -> fn_htab_lru_map_update_elem2 [xlabel = "N"]
fn_htab_lru_map_update_elem2 ->
fn_htab_lru_map_update_elem_ENOMEM [xlabel = "Y"]
fn_htab_lru_map_update_elem2 -> local_freelist_check5 [xlabel = "N"]
fn_htab_lru_map_update_elem3 -> fn_htab_lru_map_update_elem4
use_local_node -> fn_htab_lru_map_update_elem4
fn_bpf_lru_list_pop_free_to_local2 -> fn_htab_lru_map_update_elem4
local_freelist_check4 -> fn_htab_lru_map_update_elem4
fn_htab_lru_map_update_elem4 -> fn_htab_lru_map_update_elem5 [headlabel="Success"]
fn_htab_lru_map_update_elem4 ->
fn_htab_lru_map_update_elem_EBUSY [xlabel="Hashtab lock failed"]
fn_htab_lru_map_update_elem4 ->
fn_htab_lru_map_update_elem_EEXIST [xlabel="BPF_EXIST set and\nkey already exists"]
fn_htab_lru_map_update_elem4 ->
fn_htab_lru_map_update_elem_ENOENT [headlabel="BPF_NOEXIST set\nand no such entry"]
// Create invisible pad nodes to line up various nodes
pad0 [style=invis]
pad1 [style=invis]
pad2 [style=invis]
pad3 [style=invis]
pad4 [style=invis]
// Line up the key with the top of the graph
no_lock -> local_lock [style=invis]
local_lock -> lru_lock [style=invis]
lru_lock -> hash_lock [style=invis]
hash_lock -> remote_lock [style=invis]
remote_lock -> local_freelist_check5 [style=invis]
remote_lock -> fn___bpf_lru_list_shrink [style=invis]
// Line up return code nodes at the bottom of the graph
fn_htab_lru_map_update_elem -> pad0 [style=invis]
pad0 -> pad1 [style=invis]
pad1 -> pad2 [style=invis]
//pad2-> fn_htab_lru_map_update_elem_ENOMEM [style=invis]
fn_htab_lru_map_update_elem4 -> pad3 [style=invis]
pad3 -> fn_htab_lru_map_update_elem5 [style=invis]
pad3 -> fn_htab_lru_map_update_elem_EBUSY [style=invis]
pad3 -> fn_htab_lru_map_update_elem_EEXIST [style=invis]
pad3 -> fn_htab_lru_map_update_elem_ENOENT [style=invis]
// Reduce diagram width by forcing some nodes to appear above others
local_freelist_check4 -> fn_htab_lru_map_update_elem3 [style=invis]
common_lru_check2 -> pad4 [style=invis]
pad4 -> local_freelist_check5 [style=invis]
}