From 5069211e2f0b47e75119805e23ae6352d871e263 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 19 Oct 2023 10:26:42 +0200 Subject: [PATCH] perf trace: Use the right bpf_probe_read(_str) variant for reading user data Perf test case 111 Check open filename arg using perf trace + vfs_getname fails on s390. This is caused by a failing function bpf_probe_read() in file util/bpf_skel/augmented_raw_syscalls.bpf.c. The root cause is the lookup by address. Function bpf_probe_read() is used. This function works only for architectures with ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE. On s390 is not possible to determine from the address to which address space the address belongs to (user or kernel space). Replace bpf_probe_read() by bpf_probe_read_kernel() and bpf_probe_read_str() by bpf_probe_read_user_str() to explicity specify the address space the address refers to. Output before: # ./perf trace -eopen,openat -- touch /tmp/111 libbpf: prog 'sys_enter': BPF program load failed: Invalid argument libbpf: prog 'sys_enter': -- BEGIN PROG LOAD LOG -- reg type unsupported for arg#0 function sys_enter#75 0: R1=ctx(off=0,imm=0) R10=fp0 ; int sys_enter(struct syscall_enter_args *args) 0: (bf) r6 = r1 ; R1=ctx(off=0,imm=0) R6_w=ctx(off=0,imm=0) ; return bpf_get_current_pid_tgid(); 1: (85) call bpf_get_current_pid_tgid#14 ; R0_w=scalar() 2: (63) *(u32 *)(r10 -8) = r0 ; R0_w=scalar() R10=fp0 fp-8=????mmmm 3: (bf) r2 = r10 ; R2_w=fp0 R10=fp0 ; ..... lines deleted here ..... 23: (bf) r3 = r6 ; R3_w=ctx(off=0,imm=0) R6=ctx(off=0,imm=0) 24: (85) call bpf_probe_read#4 unknown func bpf_probe_read#4 processed 23 insns (limit 1000000) max_states_per_insn 0 \ total_states 2 peak_states 2 mark_read 2 -- END PROG LOAD LOG -- libbpf: prog 'sys_enter': failed to load: -22 libbpf: failed to load object 'augmented_raw_syscalls_bpf' libbpf: failed to load BPF skeleton 'augmented_raw_syscalls_bpf': -22 .... Output after: # ./perf test -Fv 111 111: Check open filename arg using perf trace + vfs_getname : --- start --- 1.085 ( 0.011 ms): touch/320753 openat(dfd: CWD, filename: \ "/tmp/temporary_file.SWH85", \ flags: CREAT|NOCTTY|NONBLOCK|WRONLY, mode: IRUGO|IWUGO) = 3 ---- end ---- Check open filename arg using perf trace + vfs_getname: Ok # Test with the sleep command shows: Output before: # ./perf trace -e *sleep sleep 1.234567890 0.000 (1234.681 ms): sleep/63114 clock_nanosleep(rqtp: \ { .tv_sec: 0, .tv_nsec: 0 }, rmtp: 0x3ffe0979720) = 0 # Output after: # ./perf trace -e *sleep sleep 1.234567890 0.000 (1234.686 ms): sleep/64277 clock_nanosleep(rqtp: \ { .tv_sec: 1, .tv_nsec: 234567890 }, rmtp: 0x3fff3df9ea0) = 0 # Fixes: 14e4b9f4289a ("perf trace: Raw augmented syscalls fix libbpf 1.0+ compatibility") Signed-off-by: Thomas Richter Co-developed-by: Arnaldo Carvalho de Melo Acked-by: Ilya Leoshkevich Tested-by: Arnaldo Carvalho de Melo Cc: Ian Rogers Cc: gor@linux.ibm.com Cc: hca@linux.ibm.com Cc: sumanthk@linux.ibm.com Cc: svens@linux.ibm.com Link: https://lore.kernel.org/r/20231019082642.3286650-1-tmricht@linux.ibm.com Signed-off-by: Namhyung Kim --- .../util/bpf_skel/augmented_raw_syscalls.bpf.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c index 939ec769bf4a..52c270330ae0 100644 --- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c +++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c @@ -153,7 +153,7 @@ static inline unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const void *arg, unsigned int arg_len) { unsigned int augmented_len = sizeof(*augmented_arg); - int string_len = bpf_probe_read_str(&augmented_arg->value, arg_len, arg); + int string_len = bpf_probe_read_user_str(&augmented_arg->value, arg_len, arg); augmented_arg->size = augmented_arg->err = 0; /* @@ -203,7 +203,7 @@ int sys_enter_connect(struct syscall_enter_args *args) _Static_assert(is_power_of_2(sizeof(augmented_args->saddr)), "sizeof(augmented_args->saddr) needs to be a power of two"); socklen &= sizeof(augmented_args->saddr) - 1; - bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg); + bpf_probe_read_user(&augmented_args->saddr, socklen, sockaddr_arg); return augmented__output(args, augmented_args, len + socklen); } @@ -221,7 +221,7 @@ int sys_enter_sendto(struct syscall_enter_args *args) socklen &= sizeof(augmented_args->saddr) - 1; - bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg); + bpf_probe_read_user(&augmented_args->saddr, socklen, sockaddr_arg); return augmented__output(args, augmented_args, len + socklen); } @@ -311,7 +311,7 @@ int sys_enter_perf_event_open(struct syscall_enter_args *args) if (augmented_args == NULL) goto failure; - if (bpf_probe_read(&augmented_args->__data, sizeof(*attr), attr) < 0) + if (bpf_probe_read_user(&augmented_args->__data, sizeof(*attr), attr) < 0) goto failure; attr_read = (const struct perf_event_attr_size *)augmented_args->__data; @@ -325,7 +325,7 @@ int sys_enter_perf_event_open(struct syscall_enter_args *args) goto failure; // Now that we read attr->size and tested it against the size limits, read it completely - if (bpf_probe_read(&augmented_args->__data, size, attr) < 0) + if (bpf_probe_read_user(&augmented_args->__data, size, attr) < 0) goto failure; return augmented__output(args, augmented_args, len + size); @@ -347,7 +347,7 @@ int sys_enter_clock_nanosleep(struct syscall_enter_args *args) if (size > sizeof(augmented_args->__data)) goto failure; - bpf_probe_read(&augmented_args->__data, size, rqtp_arg); + bpf_probe_read_user(&augmented_args->__data, size, rqtp_arg); return augmented__output(args, augmented_args, len + size); failure: @@ -385,7 +385,7 @@ int sys_enter(struct syscall_enter_args *args) if (augmented_args == NULL) return 1; - bpf_probe_read(&augmented_args->args, sizeof(augmented_args->args), args); + bpf_probe_read_kernel(&augmented_args->args, sizeof(augmented_args->args), args); /* * Jump to syscall specific augmenter, even if the default one, @@ -406,7 +406,7 @@ int sys_exit(struct syscall_exit_args *args) if (pid_filter__has(&pids_filtered, getpid())) return 0; - bpf_probe_read(&exit_args, sizeof(exit_args), args); + bpf_probe_read_kernel(&exit_args, sizeof(exit_args), args); /* * Jump to syscall specific return augmenter, even if the default one, * "!raw_syscalls:unaugmented" that will just return 1 to return the