Merge branch 'bpf-next'

Alexei Starovoitov says: ==================== bpf: add search pruning optimization and tests patch #1 commit log explains why eBPF verifier has to examine some instructions multiple times and describes the search pruning optimization that improves verification speed for branchy programs and allows more complex programs to be verified successfully. This patch completes the core verifier logic. patch #2 adds more verifier tests related to branches and search pruning I'm still working on Andy's 'bitmask for stack slots' suggestion. It will be done on top of this patch. The current verifier algorithm is brute force depth first search with state pruning. If anyone can come up with another algorithm that demonstrates better results, we'll replace the algorithm without affecting user space. Note verifier doesn't guarantee that all possible valid programs are accepted. Overly complex programs may still be rejected. Verifier improvements/optimizations will guarantee that if a program was passing verification in the past, it will still be passing. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2024-11-28 14:44:10 +08:00 · 2014-10-01 21:30:46 -04:00 · 2014-10-01 21:30:46 -04:00 · f44d61cdd3
commit f44d61cdd3
parent 1b7bde6d65 fd10c2ef3e
2 changed files with 276 additions and 0 deletions
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@ -199,6 +199,7 @@ struct verifier_env {
 	struct verifier_stack_elem *head; /* stack of verifier states to be processed */
 	int stack_size;			/* number of states to be processed */
 	struct verifier_state cur_state; /* current verifier state */
+	struct verifier_state_list **explored_states; /* search pruning optimization */
 	struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
 	u32 used_map_cnt;		/* number of used maps */
 };
@ -1219,6 +1220,8 @@ enum {
 	BRANCH = 2,
 };

+#define STATE_LIST_MARK ((struct verifier_state_list *) -1L)
+
 static int *insn_stack;	/* stack of insns to process */
 static int cur_stack;	/* current stack index */
 static int *insn_state;
@ -1241,6 +1244,10 @@ static int push_insn(int t, int w, int e, struct verifier_env *env)
 		return -EINVAL;
 	}

+	if (e == BRANCH)
+		/* mark branch target for state pruning */
+		env->explored_states[w] = STATE_LIST_MARK;
+
 	if (insn_state[w] == 0) {
 		/* tree-edge */
 		insn_state[t] = DISCOVERED | e;
@ -1314,6 +1321,10 @@ peek_stack:
 				goto peek_stack;
 			else if (ret < 0)
 				goto err_free;
+			/* tell verifier to check for equivalent states
+			 * after every call and jump
+			 */
+			env->explored_states[t + 1] = STATE_LIST_MARK;
 		} else {
 			/* conditional jump with two edges */
 			ret = push_insn(t, t + 1, FALLTHROUGH, env);
@ -1364,6 +1375,95 @@ err_free:
 	return ret;
 }

+/* compare two verifier states
+ *
+ * all states stored in state_list are known to be valid, since
+ * verifier reached 'bpf_exit' instruction through them
+ *
+ * this function is called when verifier exploring different branches of
+ * execution popped from the state stack. If it sees an old state that has
+ * more strict register state and more strict stack state then this execution
+ * branch doesn't need to be explored further, since verifier already
+ * concluded that more strict state leads to valid finish.
+ *
+ * Therefore two states are equivalent if register state is more conservative
+ * and explored stack state is more conservative than the current one.
+ * Example:
+ *       explored                   current
+ * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
+ * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
+ *
+ * In other words if current stack state (one being explored) has more
+ * valid slots than old one that already passed validation, it means
+ * the verifier can stop exploring and conclude that current state is valid too
+ *
+ * Similarly with registers. If explored state has register type as invalid
+ * whereas register type in current state is meaningful, it means that
+ * the current state will reach 'bpf_exit' instruction safely
+ */
+static bool states_equal(struct verifier_state *old, struct verifier_state *cur)
+{
+	int i;
+
+	for (i = 0; i < MAX_BPF_REG; i++) {
+		if (memcmp(&old->regs[i], &cur->regs[i],
+			   sizeof(old->regs[0])) != 0) {
+			if (old->regs[i].type == NOT_INIT ||
+			    old->regs[i].type == UNKNOWN_VALUE)
+				continue;
+			return false;
+		}
+	}
+
+	for (i = 0; i < MAX_BPF_STACK; i++) {
+		if (memcmp(&old->stack[i], &cur->stack[i],
+			   sizeof(old->stack[0])) != 0) {
+			if (old->stack[i].stype == STACK_INVALID)
+				continue;
+			return false;
+		}
+	}
+	return true;
+}
+
+static int is_state_visited(struct verifier_env *env, int insn_idx)
+{
+	struct verifier_state_list *new_sl;
+	struct verifier_state_list *sl;
+
+	sl = env->explored_states[insn_idx];
+	if (!sl)
+		/* this 'insn_idx' instruction wasn't marked, so we will not
+		 * be doing state search here
+		 */
+		return 0;
+
+	while (sl != STATE_LIST_MARK) {
+		if (states_equal(&sl->state, &env->cur_state))
+			/* reached equivalent register/stack state,
+			 * prune the search
+			 */
+			return 1;
+		sl = sl->next;
+	}
+
+	/* there were no equivalent states, remember current one.
+	 * technically the current state is not proven to be safe yet,
+	 * but it will either reach bpf_exit (which means it's safe) or
+	 * it will be rejected. Since there are no loops, we won't be
+	 * seeing this 'insn_idx' instruction again on the way to bpf_exit
+	 */
+	new_sl = kmalloc(sizeof(struct verifier_state_list), GFP_USER);
+	if (!new_sl)
+		return -ENOMEM;
+
+	/* add new state to the head of linked list */
+	memcpy(&new_sl->state, &env->cur_state, sizeof(env->cur_state));
+	new_sl->next = env->explored_states[insn_idx];
+	env->explored_states[insn_idx] = new_sl;
+	return 0;
+}
+
 static int do_check(struct verifier_env *env)
 {
 	struct verifier_state *state = &env->cur_state;
@ -1396,6 +1496,21 @@ static int do_check(struct verifier_env *env)
 			return -E2BIG;
 		}

+		err = is_state_visited(env, insn_idx);
+		if (err < 0)
+			return err;
+		if (err == 1) {
+			/* found equivalent state, can prune the search */
+			if (log_level) {
+				if (do_print_state)
+					verbose("\nfrom %d to %d: safe\n",
+						prev_insn_idx, insn_idx);
+				else
+					verbose("%d: safe\n", insn_idx);
+			}
+			goto process_bpf_exit;
+		}
+
 		if (log_level && do_print_state) {
 			verbose("\nfrom %d to %d:", prev_insn_idx, insn_idx);
 			print_verifier_state(env);
@ -1531,6 +1646,7 @@ static int do_check(struct verifier_env *env)
 				if (err)
 					return err;

+process_bpf_exit:
 				insn_idx = pop_stack(env, &prev_insn_idx);
 				if (insn_idx < 0) {
 					break;
@ -1671,6 +1787,28 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env)
 			insn->src_reg = 0;
 }

+static void free_states(struct verifier_env *env)
+{
+	struct verifier_state_list *sl, *sln;
+	int i;
+
+	if (!env->explored_states)
+		return;
+
+	for (i = 0; i < env->prog->len; i++) {
+		sl = env->explored_states[i];
+
+		if (sl)
+			while (sl != STATE_LIST_MARK) {
+				sln = sl->next;
+				kfree(sl);
+				sl = sln;
+			}
+	}
+
+	kfree(env->explored_states);
+}
+
 int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
 {
 	char __user *log_ubuf = NULL;
@ -1719,6 +1857,13 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
 	if (ret < 0)
 		goto skip_full_check;

+	env->explored_states = kcalloc(prog->len,
+				       sizeof(struct verifier_state_list *),
+				       GFP_USER);
+	ret = -ENOMEM;
+	if (!env->explored_states)
+		goto skip_full_check;
+
 	ret = check_cfg(env);
 	if (ret < 0)
 		goto skip_full_check;
@ -1727,6 +1872,7 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)

 skip_full_check:
 	while (pop_stack(env, NULL) >= 0);
+	free_states(env);

 	if (log_level && log_len >= log_size - 1) {
 		BUG_ON(log_len >= log_size);
--- a/samples/bpf/test_verifier.c
+++ b/samples/bpf/test_verifier.c
@ -461,6 +461,136 @@ static struct bpf_test tests[] = {
 		.errstr = "R0 invalid mem access",
 		.result = REJECT,
 	},
+	{
+		"jump test 1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -8),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 3),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 4),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 5),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"jump test 2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 14),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 2),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 11),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 2),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 8),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 2),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -40, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 5),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 2),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -48, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -56, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"jump test 3",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 19),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 3),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 15),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 3),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -32),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 11),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 3),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -40, 0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -40),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 7),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 3),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -48, 0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -48),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -56, 0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -56),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_unspec),
+			BPF_EXIT_INSN(),
+		},
+		.fixup = {24},
+		.result = ACCEPT,
+	},
+	{
+		"jump test 4",
+		.insns = {
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
 };

 static int probe_filter_length(struct bpf_insn *fp)