From abcdb978ea13010357ef2bf265f7bf0c55f96dbe Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 22 Nov 2023 19:18:34 +0000 Subject: [PATCH 1/4] trace2: fix signature of trace2_def_param() macro Add `struct key_value_info` argument to `trace2_def_param()`. In dc90208497 (trace2: plumb config kvi, 2023-06-28) a `kvi` argument was added to `trace2_def_param_fl()` but the macro was not up updated. Let's fix that. Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano --- trace2.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/trace2.h b/trace2.h index 40d8c2e02a..1f0669bbd2 100644 --- a/trace2.h +++ b/trace2.h @@ -337,8 +337,8 @@ struct key_value_info; void trace2_def_param_fl(const char *file, int line, const char *param, const char *value, const struct key_value_info *kvi); -#define trace2_def_param(param, value) \ - trace2_def_param_fl(__FILE__, __LINE__, (param), (value)) +#define trace2_def_param(param, value, kvi) \ + trace2_def_param_fl(__FILE__, __LINE__, (param), (value), (kvi)) /* * Tell trace2 about a newly instantiated repo object and assign From b7d49ac1ecd05669edada26b990eee677a7d3c25 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 22 Nov 2023 19:18:35 +0000 Subject: [PATCH 2/4] trace2: redact passwords from https:// URLs by default It is an unsafe practice to call something like git clone https://user:password@example.com/ This not only risks leaking the password "over the shoulder" or into the readline history of the current Unix shell, it also gets logged via Trace2 if enabled. Let's at least avoid logging such secrets via Trace2, much like we avoid logging secrets in `http.c`. Much like the code in `http.c` is guarded via `GIT_TRACE_REDACT` (defaulting to `true`), we guard the new code via `GIT_TRACE2_REDACT` (also defaulting to `true`). The new tests added in this commit uncover leaks in `builtin/clone.c` and `remote.c`. Therefore we need to turn off `TEST_PASSES_SANITIZE_LEAK`. The reasons: - We observed that `the_repository->remote_status` is not released properly. - We are using `url...insteadOf` and that runs into a code path where an allocated URL is replaced with another URL, and the original URL is never released. - `remote_states` contains plenty of `struct remote`s whose refspecs seem to be usually allocated by never released. More investigation is needed here to identify the exact cause and proper fixes for these leaks/bugs. Co-authored-by: Jeff Hostetler Signed-off-by: Jeff Hostetler Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t0210-trace2-normal.sh | 20 ++++++- trace2.c | 120 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 136 insertions(+), 4 deletions(-) diff --git a/t/t0210-trace2-normal.sh b/t/t0210-trace2-normal.sh index 80e76a4695..c312657a12 100755 --- a/t/t0210-trace2-normal.sh +++ b/t/t0210-trace2-normal.sh @@ -2,7 +2,7 @@ test_description='test trace2 facility (normal target)' -TEST_PASSES_SANITIZE_LEAK=true +TEST_PASSES_SANITIZE_LEAK=false . ./test-lib.sh # Turn off any inherited trace2 settings for this test. @@ -283,4 +283,22 @@ test_expect_success 'using global config with include' ' test_cmp expect actual ' +test_expect_success 'unsafe URLs are redacted by default' ' + test_when_finished \ + "rm -r trace.normal unredacted.normal clone clone2" && + + test_config_global \ + "url.$(pwd).insteadOf" https://user:pwd@example.com/ && + test_config_global trace2.configParams "core.*,remote.*.url" && + + GIT_TRACE2="$(pwd)/trace.normal" \ + git clone https://user:pwd@example.com/ clone && + ! grep user:pwd trace.normal && + + GIT_TRACE2_REDACT=0 GIT_TRACE2="$(pwd)/unredacted.normal" \ + git clone https://user:pwd@example.com/ clone2 && + grep "start .* clone https://user:pwd@example.com" unredacted.normal && + grep "remote.origin.url=https://user:pwd@example.com" unredacted.normal +' + test_done diff --git a/trace2.c b/trace2.c index 6dc74dff4c..87d9a3a036 100644 --- a/trace2.c +++ b/trace2.c @@ -20,6 +20,7 @@ #include "trace2/tr2_tmr.h" static int trace2_enabled; +static int trace2_redact = 1; static int tr2_next_child_id; /* modify under lock */ static int tr2_next_exec_id; /* modify under lock */ @@ -227,6 +228,8 @@ void trace2_initialize_fl(const char *file, int line) if (!tr2_tgt_want_builtins()) return; trace2_enabled = 1; + if (!git_env_bool("GIT_TRACE2_REDACT", 1)) + trace2_redact = 0; tr2_sid_get(); @@ -247,12 +250,93 @@ int trace2_is_enabled(void) return trace2_enabled; } +/* + * Redacts an argument, i.e. ensures that no password in + * https://user:password@host/-style URLs is logged. + * + * Returns the original if nothing needed to be redacted. + * Returns a pointer that needs to be `free()`d otherwise. + */ +static const char *redact_arg(const char *arg) +{ + const char *p, *colon; + size_t at; + + if (!trace2_redact || + (!skip_prefix(arg, "https://", &p) && + !skip_prefix(arg, "http://", &p))) + return arg; + + at = strcspn(p, "@/"); + if (p[at] != '@') + return arg; + + colon = memchr(p, ':', at); + if (!colon) + return arg; + + return xstrfmt("%.*s:%s", (int)(colon - arg), arg, p + at); +} + +/* + * Redacts arguments in an argument list. + * + * Returns the original if nothing needed to be redacted. + * Otherwise, returns a new array that needs to be released + * via `free_redacted_argv()`. + */ +static const char **redact_argv(const char **argv) +{ + int i, j; + const char *redacted = NULL; + const char **ret; + + if (!trace2_redact) + return argv; + + for (i = 0; argv[i]; i++) + if ((redacted = redact_arg(argv[i])) != argv[i]) + break; + + if (!argv[i]) + return argv; + + for (j = 0; argv[j]; j++) + ; /* keep counting */ + + ALLOC_ARRAY(ret, j + 1); + ret[j] = NULL; + + for (j = 0; j < i; j++) + ret[j] = argv[j]; + ret[i] = redacted; + for (++i; argv[i]; i++) { + redacted = redact_arg(argv[i]); + ret[i] = redacted ? redacted : argv[i]; + } + + return ret; +} + +static void free_redacted_argv(const char **redacted, const char **argv) +{ + int i; + + if (redacted != argv) { + for (i = 0; argv[i]; i++) + if (redacted[i] != argv[i]) + free((void *)redacted[i]); + free((void *)redacted); + } +} + void trace2_cmd_start_fl(const char *file, int line, const char **argv) { struct tr2_tgt *tgt_j; int j; uint64_t us_now; uint64_t us_elapsed_absolute; + const char **redacted; if (!trace2_enabled) return; @@ -260,10 +344,14 @@ void trace2_cmd_start_fl(const char *file, int line, const char **argv) us_now = getnanotime() / 1000; us_elapsed_absolute = tr2tls_absolute_elapsed(us_now); + redacted = redact_argv(argv); + for_each_wanted_builtin (j, tgt_j) if (tgt_j->pfn_start_fl) tgt_j->pfn_start_fl(file, line, us_elapsed_absolute, - argv); + redacted); + + free_redacted_argv(redacted, argv); } void trace2_cmd_exit_fl(const char *file, int line, int code) @@ -409,6 +497,7 @@ void trace2_child_start_fl(const char *file, int line, int j; uint64_t us_now; uint64_t us_elapsed_absolute; + const char **orig_argv = cmd->args.v; if (!trace2_enabled) return; @@ -419,10 +508,24 @@ void trace2_child_start_fl(const char *file, int line, cmd->trace2_child_id = tr2tls_locked_increment(&tr2_next_child_id); cmd->trace2_child_us_start = us_now; + /* + * The `pfn_child_start_fl` API takes a `struct child_process` + * rather than a simple `argv` for the child because some + * targets make use of the additional context bits/values. So + * temporarily replace the original argv (inside the `strvec`) + * with a possibly redacted version. + */ + cmd->args.v = redact_argv(orig_argv); + for_each_wanted_builtin (j, tgt_j) if (tgt_j->pfn_child_start_fl) tgt_j->pfn_child_start_fl(file, line, us_elapsed_absolute, cmd); + + if (cmd->args.v != orig_argv) { + free_redacted_argv(cmd->args.v, orig_argv); + cmd->args.v = orig_argv; + } } void trace2_child_exit_fl(const char *file, int line, struct child_process *cmd, @@ -493,6 +596,7 @@ int trace2_exec_fl(const char *file, int line, const char *exe, int exec_id; uint64_t us_now; uint64_t us_elapsed_absolute; + const char **redacted; if (!trace2_enabled) return -1; @@ -502,10 +606,14 @@ int trace2_exec_fl(const char *file, int line, const char *exe, exec_id = tr2tls_locked_increment(&tr2_next_exec_id); + redacted = redact_argv(argv); + for_each_wanted_builtin (j, tgt_j) if (tgt_j->pfn_exec_fl) tgt_j->pfn_exec_fl(file, line, us_elapsed_absolute, - exec_id, exe, argv); + exec_id, exe, redacted); + + free_redacted_argv(redacted, argv); return exec_id; } @@ -637,13 +745,19 @@ void trace2_def_param_fl(const char *file, int line, const char *param, { struct tr2_tgt *tgt_j; int j; + const char *redacted; if (!trace2_enabled) return; + redacted = redact_arg(value); + for_each_wanted_builtin (j, tgt_j) if (tgt_j->pfn_param_fl) - tgt_j->pfn_param_fl(file, line, param, value, kvi); + tgt_j->pfn_param_fl(file, line, param, redacted, kvi); + + if (redacted != value) + free((void *)redacted); } void trace2_def_repo_fl(const char *file, int line, struct repository *repo) From c73e7f80d32073b6fafec553d5875e720287e216 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 22 Nov 2023 19:18:36 +0000 Subject: [PATCH 3/4] t0211: test URL redacting in PERF format This transmogrifies the test case that was just added to t0210, to also cover the `GIT_TRACE2_PERF` backend. Just like t0211, we now have to toggle the `TEST_PASSES_SANITIZE_LEAK` annotation. Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano --- t/t0211-trace2-perf.sh | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/t/t0211-trace2-perf.sh b/t/t0211-trace2-perf.sh index cfba686132..290b6eaaab 100755 --- a/t/t0211-trace2-perf.sh +++ b/t/t0211-trace2-perf.sh @@ -2,7 +2,7 @@ test_description='test trace2 facility (perf target)' -TEST_PASSES_SANITIZE_LEAK=true +TEST_PASSES_SANITIZE_LEAK=false . ./test-lib.sh # Turn off any inherited trace2 settings for this test. @@ -268,4 +268,23 @@ test_expect_success PTHREADS 'global counter test/test2' ' have_counter_event "main" "counter" "test" "test2" 60 actual ' +test_expect_success 'unsafe URLs are redacted by default' ' + test_when_finished \ + "rm -r actual trace.perf unredacted.perf clone clone2" && + + test_config_global \ + "url.$(pwd).insteadOf" https://user:pwd@example.com/ && + test_config_global trace2.configParams "core.*,remote.*.url" && + + GIT_TRACE2_PERF="$(pwd)/trace.perf" \ + git clone https://user:pwd@example.com/ clone && + ! grep user:pwd trace.perf && + + GIT_TRACE2_REDACT=0 GIT_TRACE2_PERF="$(pwd)/unredacted.perf" \ + git clone https://user:pwd@example.com/ clone2 && + perl "$TEST_DIRECTORY/t0211/scrub_perf.perl" actual && + grep "d0|main|start|.* clone https://user:pwd@example.com" actual && + grep "d0|main|def_param|.*|remote.origin.url:https://user:pwd@example.com" actual +' + test_done From 16fa3eebc0b92dc2aa5d875c2f1338644e3c5b99 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 22 Nov 2023 19:18:37 +0000 Subject: [PATCH 4/4] t0212: test URL redacting in EVENT format In the added tests cases, skip testing the `GIT_TRACE2_REDACT=0` case because we would need to exactly model the full JSON event stream like we did in the preceding basic tests and I do not think it is worth it. Furthermore, the Trace2 routines print the same content in normal, perf, or event format, and in t0210 and t0211 we already tested the basic functionality, so no need to repeat it here. In this test, we use the test-helper to unit test each of the event messages where URLs can appear and confirm that they are redacted in each event. Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano --- t/helper/test-trace2.c | 55 +++++++++++++++++++++++++++++++++++++++++ t/t0212-trace2-event.sh | 40 ++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) diff --git a/t/helper/test-trace2.c b/t/helper/test-trace2.c index 20c7495f38..778ac9125b 100644 --- a/t/helper/test-trace2.c +++ b/t/helper/test-trace2.c @@ -412,6 +412,56 @@ static int ut_201counter(int argc, const char **argv) return 0; } +static int ut_300redact_start(int argc, const char **argv) +{ + if (!argc) + die("expect "); + + trace2_cmd_start(argv); + + return 0; +} + +static int ut_301redact_child_start(int argc, const char **argv) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + int k; + + if (!argc) + die("expect "); + + for (k = 0; argv[k]; k++) + strvec_push(&cmd.args, argv[k]); + + trace2_child_start(&cmd); + + strvec_clear(&cmd.args); + + return 0; +} + +static int ut_302redact_exec(int argc, const char **argv) +{ + if (!argc) + die("expect "); + + trace2_exec(argv[0], &argv[1]); + + return 0; +} + +static int ut_303redact_def_param(int argc, const char **argv) +{ + struct key_value_info kvi = KVI_INIT; + + if (argc < 2) + die("expect "); + + trace2_def_param(argv[0], argv[1], &kvi); + + return 0; +} + /* * Usage: * test-tool trace2 @@ -438,6 +488,11 @@ static struct unit_test ut_table[] = { { ut_200counter, "200counter", " [ [ [...]]]" }, { ut_201counter, "201counter", " " }, + + { ut_300redact_start, "300redact_start", "" }, + { ut_301redact_child_start, "301redact_child_start", "" }, + { ut_302redact_exec, "302redact_exec", " " }, + { ut_303redact_def_param, "303redact_def_param", " " }, }; /* clang-format on */ diff --git a/t/t0212-trace2-event.sh b/t/t0212-trace2-event.sh index 6d3374ff77..147643d582 100755 --- a/t/t0212-trace2-event.sh +++ b/t/t0212-trace2-event.sh @@ -323,4 +323,44 @@ test_expect_success 'discard traces when there are too many files' ' head -n2 trace_target_dir/git-trace2-discard | tail -n1 | grep \"event\":\"too_many_files\" ' +# In the following "...redact..." tests, skip testing the GIT_TRACE2_REDACT=0 +# case because we would need to exactly model the full JSON event stream like +# we did in the basic tests above and I do not think it is worth it. + +test_expect_success 'unsafe URLs are redacted by default in cmd_start events' ' + test_when_finished \ + "rm -r trace.event" && + + GIT_TRACE2_EVENT="$(pwd)/trace.event" \ + test-tool trace2 300redact_start git clone https://user:pwd@example.com/ clone2 && + ! grep user:pwd trace.event +' + +test_expect_success 'unsafe URLs are redacted by default in child_start events' ' + test_when_finished \ + "rm -r trace.event" && + + GIT_TRACE2_EVENT="$(pwd)/trace.event" \ + test-tool trace2 301redact_child_start git clone https://user:pwd@example.com/ clone2 && + ! grep user:pwd trace.event +' + +test_expect_success 'unsafe URLs are redacted by default in exec events' ' + test_when_finished \ + "rm -r trace.event" && + + GIT_TRACE2_EVENT="$(pwd)/trace.event" \ + test-tool trace2 302redact_exec git clone https://user:pwd@example.com/ clone2 && + ! grep user:pwd trace.event +' + +test_expect_success 'unsafe URLs are redacted by default in def_param events' ' + test_when_finished \ + "rm -r trace.event" && + + GIT_TRACE2_EVENT="$(pwd)/trace.event" \ + test-tool trace2 303redact_def_param url https://user:pwd@example.com/ && + ! grep user:pwd trace.event +' + test_done