linux/samples/bpf/tracex5_kern.c
Alexei Starovoitov 5bacd7805a samples/bpf: bpf_tail_call example for tracing
kprobe example that demonstrates how future seccomp programs may look like.
It attaches to seccomp_phase1() function and tail-calls other BPF programs
depending on syscall number.

Existing optimized classic BPF seccomp programs generated by Chrome look like:
if (sd.nr < 121) {
  if (sd.nr < 57) {
    if (sd.nr < 22) {
      if (sd.nr < 7) {
        if (sd.nr < 4) {
          if (sd.nr < 1) {
            check sys_read
          } else {
            if (sd.nr < 3) {
              check sys_write and sys_open
            } else {
              check sys_close
            }
          }
        } else {
      } else {
    } else {
  } else {
} else {
}

the future seccomp using native eBPF may look like:
  bpf_tail_call(&sd, &syscall_jmp_table, sd.nr);
which is simpler, faster and leaves more room for per-syscall checks.

Usage:
$ sudo ./tracex5
<...>-366   [001] d...     4.870033: : read(fd=1, buf=00007f6d5bebf000, size=771)
<...>-369   [003] d...     4.870066: : mmap
<...>-369   [003] d...     4.870077: : syscall=110 (one of get/set uid/pid/gid)
<...>-369   [003] d...     4.870089: : syscall=107 (one of get/set uid/pid/gid)
   sh-369   [000] d...     4.891740: : read(fd=0, buf=00000000023d1000, size=512)
   sh-369   [000] d...     4.891747: : write(fd=1, buf=00000000023d3000, size=512)
   sh-369   [000] d...     4.891747: : read(fd=1, buf=00000000023d3000, size=512)

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-21 17:07:59 -04:00

76 lines
1.9 KiB
C

/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include <linux/ptrace.h>
#include <linux/version.h>
#include <uapi/linux/bpf.h>
#include <uapi/linux/seccomp.h>
#include "bpf_helpers.h"
#define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F
struct bpf_map_def SEC("maps") progs = {
.type = BPF_MAP_TYPE_PROG_ARRAY,
.key_size = sizeof(u32),
.value_size = sizeof(u32),
.max_entries = 1024,
};
SEC("kprobe/seccomp_phase1")
int bpf_prog1(struct pt_regs *ctx)
{
struct seccomp_data sd = {};
bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di);
/* dispatch into next BPF program depending on syscall number */
bpf_tail_call(ctx, &progs, sd.nr);
/* fall through -> unknown syscall */
if (sd.nr >= __NR_getuid && sd.nr <= __NR_getsid) {
char fmt[] = "syscall=%d (one of get/set uid/pid/gid)\n";
bpf_trace_printk(fmt, sizeof(fmt), sd.nr);
}
return 0;
}
/* we jump here when syscall number == __NR_write */
PROG(__NR_write)(struct pt_regs *ctx)
{
struct seccomp_data sd = {};
bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di);
if (sd.args[2] == 512) {
char fmt[] = "write(fd=%d, buf=%p, size=%d)\n";
bpf_trace_printk(fmt, sizeof(fmt),
sd.args[0], sd.args[1], sd.args[2]);
}
return 0;
}
PROG(__NR_read)(struct pt_regs *ctx)
{
struct seccomp_data sd = {};
bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di);
if (sd.args[2] > 128 && sd.args[2] <= 1024) {
char fmt[] = "read(fd=%d, buf=%p, size=%d)\n";
bpf_trace_printk(fmt, sizeof(fmt),
sd.args[0], sd.args[1], sd.args[2]);
}
return 0;
}
PROG(__NR_mmap)(struct pt_regs *ctx)
{
char fmt[] = "mmap\n";
bpf_trace_printk(fmt, sizeof(fmt));
return 0;
}
char _license[] SEC("license") = "GPL";
u32 _version SEC("version") = LINUX_VERSION_CODE;