From eb22e7dfa23da6bd9aed9bd1dad69e1e8e167d24 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:45:15 +0100 Subject: [PATCH 01/30] attr: fix overflow when upserting attribute with overly long name The function `git_attr_internal()` is called to upsert attributes into the global map. And while all callers pass a `size_t`, the function itself accepts an `int` as the attribute name's length. This can lead to an integer overflow in case the attribute name is longer than `INT_MAX`. Now this overflow seems harmless as the first thing we do is to call `attr_name_valid()`, and that function only succeeds in case all chars in the range of `namelen` match a certain small set of chars. We thus can't do an out-of-bounds read as NUL is not part of that set and all strings passed to this function are NUL-terminated. And furthermore, we wouldn't ever read past the current attribute name anyway due to the same reason. And if validation fails we will return early. On the other hand it feels fragile to rely on this behaviour, even more so given that we pass `namelen` to `FLEX_ALLOC_MEM()`. So let's instead just do the correct thing here and accept a `size_t` as line length. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- attr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/attr.c b/attr.c index 4ef85d668b5..39ce0eb95e9 100644 --- a/attr.c +++ b/attr.c @@ -210,7 +210,7 @@ static void report_invalid_attr(const char *name, size_t len, * dictionary. If no entry is found, create a new attribute and store it in * the dictionary. */ -static const struct git_attr *git_attr_internal(const char *name, int namelen) +static const struct git_attr *git_attr_internal(const char *name, size_t namelen) { struct git_attr *a; From 8d0d48cf2157cfb914db1f53b3fe40785b86f3aa Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:45:19 +0100 Subject: [PATCH 02/30] attr: fix out-of-bounds read with huge attribute names There is an out-of-bounds read possible when parsing gitattributes that have an attribute that is 2^31+1 bytes long. This is caused due to an integer overflow when we assign the result of strlen(3P) to an `int`, where we use the wrapped-around value in a subsequent call to memcpy(3P). The following code reproduces the issue: blob=$(perl -e 'print "a" x 2147483649 . " attr"' | git hash-object -w --stdin) git update-index --add --cacheinfo 100644,$blob,.gitattributes git check-attr --all file AddressSanitizer:DEADLYSIGNAL ================================================================= ==8451==ERROR: AddressSanitizer: SEGV on unknown address 0x7f93efa00800 (pc 0x7f94f1f8f082 bp 0x7ffddb59b3a0 sp 0x7ffddb59ab28 T0) ==8451==The signal is caused by a READ memory access. #0 0x7f94f1f8f082 (/usr/lib/libc.so.6+0x176082) #1 0x7f94f2047d9c in __interceptor_strspn /usr/src/debug/gcc/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc:752 #2 0x560e190f7f26 in parse_attr_line attr.c:375 #3 0x560e190f9663 in handle_attr_line attr.c:660 #4 0x560e190f9ddd in read_attr_from_index attr.c:769 #5 0x560e190f9f14 in read_attr attr.c:797 #6 0x560e190fa24e in bootstrap_attr_stack attr.c:867 #7 0x560e190fa4a5 in prepare_attr_stack attr.c:902 #8 0x560e190fb5dc in collect_some_attrs attr.c:1097 #9 0x560e190fb93f in git_all_attrs attr.c:1128 #10 0x560e18e6136e in check_attr builtin/check-attr.c:67 #11 0x560e18e61c12 in cmd_check_attr builtin/check-attr.c:183 #12 0x560e18e15993 in run_builtin git.c:466 #13 0x560e18e16397 in handle_builtin git.c:721 #14 0x560e18e16b2b in run_argv git.c:788 #15 0x560e18e17991 in cmd_main git.c:926 #16 0x560e190ae2bd in main common-main.c:57 #17 0x7f94f1e3c28f (/usr/lib/libc.so.6+0x2328f) #18 0x7f94f1e3c349 in __libc_start_main (/usr/lib/libc.so.6+0x23349) #19 0x560e18e110e4 in _start ../sysdeps/x86_64/start.S:115 AddressSanitizer can not provide additional info. SUMMARY: AddressSanitizer: SEGV (/usr/lib/libc.so.6+0x176082) ==8451==ABORTING Fix this bug by converting the variable to a `size_t` instead. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- attr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/attr.c b/attr.c index 39ce0eb95e9..9d42bc1721f 100644 --- a/attr.c +++ b/attr.c @@ -333,7 +333,7 @@ static const char *parse_attr(const char *src, int lineno, const char *cp, static struct match_attr *parse_attr_line(const char *line, const char *src, int lineno, int macro_ok) { - int namelen; + size_t namelen; int num_attr, i; const char *cp, *name, *states; struct match_attr *res = NULL; From 24557209500e6ed618f04a8795a111a0c491a29c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:45:23 +0100 Subject: [PATCH 03/30] attr: fix integer overflow when parsing huge attribute names It is possible to trigger an integer overflow when parsing attribute names that are longer than 2^31 bytes because we assign the result of strlen(3P) to an `int` instead of to a `size_t`. This can lead to an abort in vsnprintf(3P) with the following reproducer: blob=$(perl -e 'print "A " . "B"x2147483648 . "\n"' | git hash-object -w --stdin) git update-index --add --cacheinfo 100644,$blob,.gitattributes git check-attr --all path BUG: strbuf.c:400: your vsnprintf is broken (returned -1) But furthermore, assuming that the attribute name is even longer than that, it can cause us to silently truncate the attribute and thus lead to wrong results. Fix this integer overflow by using a `size_t` instead. This fixes the silent truncation of attribute names, but it only partially fixes the BUG we hit: even though the initial BUG is fixed, we can still hit a BUG when parsing invalid attribute lines via `report_invalid_attr()`. This is due to an underlying design issue in vsnprintf(3P) which only knows to return an `int`, and thus it may always overflow with large inputs. This issue is benign though: the worst that can happen is that the error message is misreported to be either truncated or too long, but due to the buffer being NUL terminated we wouldn't ever do an out-of-bounds read here. Reported-by: Markus Vervier Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- attr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/attr.c b/attr.c index 9d42bc1721f..4a10ba4d94a 100644 --- a/attr.c +++ b/attr.c @@ -289,7 +289,7 @@ static const char *parse_attr(const char *src, int lineno, const char *cp, struct attr_state *e) { const char *ep, *equals; - int len; + size_t len; ep = cp + strcspn(cp, blank); equals = strchr(cp, '='); From 34ace8bad02bb14ecc5b631f7e3daaa7a9bba7d9 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:45:27 +0100 Subject: [PATCH 04/30] attr: fix out-of-bounds write when parsing huge number of attributes It is possible to trigger an integer overflow when parsing attribute names when there are more than 2^31 of them for a single pattern. This can either lead to us dying due to trying to request too many bytes: blob=$(perl -e 'print "f" . " a=" x 2147483649' | git hash-object -w --stdin) git update-index --add --cacheinfo 100644,$blob,.gitattributes git attr-check --all file ================================================================= ==1022==ERROR: AddressSanitizer: requested allocation size 0xfffffff800000032 (0xfffffff800001038 after adjustments for alignment, red zones etc.) exceeds maximum supported size of 0x10000000000 (thread T0) #0 0x7fd3efabf411 in __interceptor_calloc /usr/src/debug/gcc/libsanitizer/asan/asan_malloc_linux.cpp:77 #1 0x5563a0a1e3d3 in xcalloc wrapper.c:150 #2 0x5563a058d005 in parse_attr_line attr.c:384 #3 0x5563a058e661 in handle_attr_line attr.c:660 #4 0x5563a058eddb in read_attr_from_index attr.c:769 #5 0x5563a058ef12 in read_attr attr.c:797 #6 0x5563a058f24c in bootstrap_attr_stack attr.c:867 #7 0x5563a058f4a3 in prepare_attr_stack attr.c:902 #8 0x5563a05905da in collect_some_attrs attr.c:1097 #9 0x5563a059093d in git_all_attrs attr.c:1128 #10 0x5563a02f636e in check_attr builtin/check-attr.c:67 #11 0x5563a02f6c12 in cmd_check_attr builtin/check-attr.c:183 #12 0x5563a02aa993 in run_builtin git.c:466 #13 0x5563a02ab397 in handle_builtin git.c:721 #14 0x5563a02abb2b in run_argv git.c:788 #15 0x5563a02ac991 in cmd_main git.c:926 #16 0x5563a05432bd in main common-main.c:57 #17 0x7fd3ef82228f (/usr/lib/libc.so.6+0x2328f) ==1022==HINT: if you don't care about these errors you may set allocator_may_return_null=1 SUMMARY: AddressSanitizer: allocation-size-too-big /usr/src/debug/gcc/libsanitizer/asan/asan_malloc_linux.cpp:77 in __interceptor_calloc ==1022==ABORTING Or, much worse, it can lead to an out-of-bounds write because we underallocate and then memcpy(3P) into an array: perl -e ' print "A " . "\rh="x2000000000; print "\rh="x2000000000; print "\rh="x294967294 . "\n" ' >.gitattributes git add .gitattributes git commit -am "evil attributes" $ git clone --quiet /path/to/repo ================================================================= ==15062==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x602000002550 at pc 0x5555559884d5 bp 0x7fffffffbc60 sp 0x7fffffffbc58 WRITE of size 8 at 0x602000002550 thread T0 #0 0x5555559884d4 in parse_attr_line attr.c:393 #1 0x5555559884d4 in handle_attr_line attr.c:660 #2 0x555555988902 in read_attr_from_index attr.c:784 #3 0x555555988902 in read_attr_from_index attr.c:747 #4 0x555555988a1d in read_attr attr.c:800 #5 0x555555989b0c in bootstrap_attr_stack attr.c:882 #6 0x555555989b0c in prepare_attr_stack attr.c:917 #7 0x555555989b0c in collect_some_attrs attr.c:1112 #8 0x55555598b141 in git_check_attr attr.c:1126 #9 0x555555a13004 in convert_attrs convert.c:1311 #10 0x555555a95e04 in checkout_entry_ca entry.c:553 #11 0x555555d58bf6 in checkout_entry entry.h:42 #12 0x555555d58bf6 in check_updates unpack-trees.c:480 #13 0x555555d5eb55 in unpack_trees unpack-trees.c:2040 #14 0x555555785ab7 in checkout builtin/clone.c:724 #15 0x555555785ab7 in cmd_clone builtin/clone.c:1384 #16 0x55555572443c in run_builtin git.c:466 #17 0x55555572443c in handle_builtin git.c:721 #18 0x555555727872 in run_argv git.c:788 #19 0x555555727872 in cmd_main git.c:926 #20 0x555555721fa0 in main common-main.c:57 #21 0x7ffff73f1d09 in __libc_start_main ../csu/libc-start.c:308 #22 0x555555723f39 in _start (git+0x1cff39) 0x602000002552 is located 0 bytes to the right of 2-byte region [0x602000002550,0x602000002552) allocated by thread T0 here: #0 0x7ffff768c037 in __interceptor_calloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:154 #1 0x555555d7fff7 in xcalloc wrapper.c:150 #2 0x55555598815f in parse_attr_line attr.c:384 #3 0x55555598815f in handle_attr_line attr.c:660 #4 0x555555988902 in read_attr_from_index attr.c:784 #5 0x555555988902 in read_attr_from_index attr.c:747 #6 0x555555988a1d in read_attr attr.c:800 #7 0x555555989b0c in bootstrap_attr_stack attr.c:882 #8 0x555555989b0c in prepare_attr_stack attr.c:917 #9 0x555555989b0c in collect_some_attrs attr.c:1112 #10 0x55555598b141 in git_check_attr attr.c:1126 #11 0x555555a13004 in convert_attrs convert.c:1311 #12 0x555555a95e04 in checkout_entry_ca entry.c:553 #13 0x555555d58bf6 in checkout_entry entry.h:42 #14 0x555555d58bf6 in check_updates unpack-trees.c:480 #15 0x555555d5eb55 in unpack_trees unpack-trees.c:2040 #16 0x555555785ab7 in checkout builtin/clone.c:724 #17 0x555555785ab7 in cmd_clone builtin/clone.c:1384 #18 0x55555572443c in run_builtin git.c:466 #19 0x55555572443c in handle_builtin git.c:721 #20 0x555555727872 in run_argv git.c:788 #21 0x555555727872 in cmd_main git.c:926 #22 0x555555721fa0 in main common-main.c:57 #23 0x7ffff73f1d09 in __libc_start_main ../csu/libc-start.c:308 SUMMARY: AddressSanitizer: heap-buffer-overflow attr.c:393 in parse_attr_line Shadow bytes around the buggy address: 0x0c047fff8450: fa fa 00 02 fa fa 00 07 fa fa fd fd fa fa 00 00 0x0c047fff8460: fa fa 02 fa fa fa fd fd fa fa 00 06 fa fa 05 fa 0x0c047fff8470: fa fa fd fd fa fa 00 02 fa fa 06 fa fa fa 05 fa 0x0c047fff8480: fa fa 07 fa fa fa fd fd fa fa 00 01 fa fa 00 02 0x0c047fff8490: fa fa 00 03 fa fa 00 fa fa fa 00 01 fa fa 00 03 =>0x0c047fff84a0: fa fa 00 01 fa fa 00 02 fa fa[02]fa fa fa fa fa 0x0c047fff84b0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c047fff84c0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c047fff84d0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c047fff84e0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c047fff84f0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa Shadow byte legend (one shadow byte represents 8 application bytes): Addressable: 00 Partially addressable: 01 02 03 04 05 06 07 Heap left redzone: fa Freed heap region: fd Stack left redzone: f1 Stack mid redzone: f2 Stack right redzone: f3 Stack after return: f5 Stack use after scope: f8 Global redzone: f9 Global init order: f6 Poisoned by user: f7 Container overflow: fc Array cookie: ac Intra object redzone: bb ASan internal: fe Left alloca redzone: ca Right alloca redzone: cb Shadow gap: cc ==15062==ABORTING Fix this bug by using `size_t` instead to count the number of attributes so that this value cannot reasonably overflow without running out of memory before already. Reported-by: Markus Vervier Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- attr.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/attr.c b/attr.c index 4a10ba4d94a..525f6da2013 100644 --- a/attr.c +++ b/attr.c @@ -272,7 +272,7 @@ struct match_attr { const struct git_attr *attr; } u; char is_macro; - unsigned num_attr; + size_t num_attr; struct attr_state state[FLEX_ARRAY]; }; @@ -333,8 +333,7 @@ static const char *parse_attr(const char *src, int lineno, const char *cp, static struct match_attr *parse_attr_line(const char *line, const char *src, int lineno, int macro_ok) { - size_t namelen; - int num_attr, i; + size_t namelen, num_attr, i; const char *cp, *name, *states; struct match_attr *res = NULL; int is_macro; @@ -451,7 +450,8 @@ static void attr_stack_free(struct attr_stack *e) free(e->origin); for (i = 0; i < e->num_matches; i++) { struct match_attr *a = e->attrs[i]; - int j; + size_t j; + for (j = 0; j < a->num_attr; j++) { const char *setto = a->state[j].setto; if (setto == ATTR__TRUE || @@ -1001,12 +1001,12 @@ static int macroexpand_one(struct all_attrs_item *all_attrs, int nr, int rem); static int fill_one(const char *what, struct all_attrs_item *all_attrs, const struct match_attr *a, int rem) { - int i; + size_t i; - for (i = a->num_attr - 1; rem > 0 && i >= 0; i--) { - const struct git_attr *attr = a->state[i].attr; + for (i = a->num_attr; rem > 0 && i > 0; i--) { + const struct git_attr *attr = a->state[i - 1].attr; const char **n = &(all_attrs[attr->attr_nr].value); - const char *v = a->state[i].setto; + const char *v = a->state[i - 1].setto; if (*n == ATTR__UNKNOWN) { debug_set(what, From 447ac906e189535e77dcb1f4bbe3f1bc917d4c12 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:45:31 +0100 Subject: [PATCH 05/30] attr: fix out-of-bounds read with unreasonable amount of patterns The `struct attr_stack` tracks the stack of all patterns together with their attributes. When parsing a gitattributes file that has more than 2^31 such patterns though we may trigger multiple out-of-bounds reads on 64 bit platforms. This is because while the `num_matches` variable is an unsigned integer, we always use a signed integer to iterate over them. I have not been able to reproduce this issue due to memory constraints on my systems. But despite the out-of-bounds reads, the worst thing that can seemingly happen is to call free(3P) with a garbage pointer when calling `attr_stack_free()`. Fix this bug by using unsigned integers to iterate over the array. While this makes the iteration somewhat awkward when iterating in reverse, it is at least better than knowingly running into an out-of-bounds read. While at it, convert the call to `ALLOC_GROW` to use `ALLOC_GROW_BY` instead. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- attr.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/attr.c b/attr.c index 525f6da2013..98c231d6758 100644 --- a/attr.c +++ b/attr.c @@ -446,7 +446,7 @@ struct attr_stack { static void attr_stack_free(struct attr_stack *e) { - int i; + unsigned i; free(e->origin); for (i = 0; i < e->num_matches; i++) { struct match_attr *a = e->attrs[i]; @@ -660,8 +660,8 @@ static void handle_attr_line(struct attr_stack *res, a = parse_attr_line(line, src, lineno, macro_ok); if (!a) return; - ALLOC_GROW(res->attrs, res->num_matches + 1, res->alloc); - res->attrs[res->num_matches++] = a; + ALLOC_GROW_BY(res->attrs, res->num_matches, 1, res->alloc); + res->attrs[res->num_matches - 1] = a; } static struct attr_stack *read_attr_from_array(const char **list) @@ -1025,11 +1025,11 @@ static int fill(const char *path, int pathlen, int basename_offset, struct all_attrs_item *all_attrs, int rem) { for (; rem > 0 && stack; stack = stack->prev) { - int i; + unsigned i; const char *base = stack->origin ? stack->origin : ""; - for (i = stack->num_matches - 1; 0 < rem && 0 <= i; i--) { - const struct match_attr *a = stack->attrs[i]; + for (i = stack->num_matches; 0 < rem && 0 < i; i--) { + const struct match_attr *a = stack->attrs[i - 1]; if (a->is_macro) continue; if (path_matches(path, pathlen, basename_offset, @@ -1060,9 +1060,9 @@ static void determine_macros(struct all_attrs_item *all_attrs, const struct attr_stack *stack) { for (; stack; stack = stack->prev) { - int i; - for (i = stack->num_matches - 1; i >= 0; i--) { - const struct match_attr *ma = stack->attrs[i]; + unsigned i; + for (i = stack->num_matches; i > 0; i--) { + const struct match_attr *ma = stack->attrs[i - 1]; if (ma->is_macro) { int n = ma->u.attr->attr_nr; if (!all_attrs[n].macro) { From e1e12e97ac73ded85f7d000da1063a774b3cc14f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:45:36 +0100 Subject: [PATCH 06/30] attr: fix integer overflow with more than INT_MAX macros Attributes have a field that tracks the position in the `all_attrs` array they're stored inside. This field gets set via `hashmap_get_size` when adding the attribute to the global map of attributes. But while the field is of type `int`, the value returned by `hashmap_get_size` is an `unsigned int`. It can thus happen that the value overflows, where we would now dereference teh `all_attrs` array at an out-of-bounds value. We do have a sanity check for this overflow via an assert that verifies the index matches the new hashmap's size. But asserts are not a proper mechanism to detect against any such overflows as they may not in fact be compiled into production code. Fix this by using an `unsigned int` to track the index and convert the assert to a call `die()`. Reported-by: Jeff King Signed-off-by: Junio C Hamano --- attr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/attr.c b/attr.c index 98c231d6758..d1faf69083a 100644 --- a/attr.c +++ b/attr.c @@ -28,7 +28,7 @@ static const char git_attr__unknown[] = "(builtin)unknown"; #endif struct git_attr { - int attr_nr; /* unique attribute number */ + unsigned int attr_nr; /* unique attribute number */ char name[FLEX_ARRAY]; /* attribute name */ }; @@ -226,8 +226,8 @@ static const struct git_attr *git_attr_internal(const char *name, size_t namelen a->attr_nr = hashmap_get_size(&g_attr_hashmap.map); attr_hashmap_add(&g_attr_hashmap, a->name, namelen, a); - assert(a->attr_nr == - (hashmap_get_size(&g_attr_hashmap.map) - 1)); + if (a->attr_nr != hashmap_get_size(&g_attr_hashmap.map) - 1) + die(_("unable to add additional attribute")); } hashmap_unlock(&g_attr_hashmap); @@ -1064,7 +1064,7 @@ static void determine_macros(struct all_attrs_item *all_attrs, for (i = stack->num_matches; i > 0; i--) { const struct match_attr *ma = stack->attrs[i - 1]; if (ma->is_macro) { - int n = ma->u.attr->attr_nr; + unsigned int n = ma->u.attr->attr_nr; if (!all_attrs[n].macro) { all_attrs[n].macro = ma; } @@ -1116,7 +1116,7 @@ void git_check_attr(const struct index_state *istate, collect_some_attrs(istate, path, check); for (i = 0; i < check->nr; i++) { - size_t n = check->items[i].attr->attr_nr; + unsigned int n = check->items[i].attr->attr_nr; const char *value = check->all_attrs[n].value; if (value == ATTR__UNKNOWN) value = ATTR__UNSET; From a60a66e409c265b2944f18bf43581c146812586d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:45:40 +0100 Subject: [PATCH 07/30] attr: harden allocation against integer overflows When parsing an attributes line, we need to allocate an array that holds all attributes specified for the given file pattern. The calculation to determine the number of bytes that need to be allocated was prone to an overflow though when there was an unreasonable amount of attributes. Harden the allocation by instead using the `st_` helper functions that cause us to die when we hit an integer overflow. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- attr.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/attr.c b/attr.c index d1faf69083a..a9f7063cfc9 100644 --- a/attr.c +++ b/attr.c @@ -380,10 +380,9 @@ static struct match_attr *parse_attr_line(const char *line, const char *src, goto fail_return; } - res = xcalloc(1, - sizeof(*res) + - sizeof(struct attr_state) * num_attr + - (is_macro ? 0 : namelen + 1)); + res = xcalloc(1, st_add3(sizeof(*res), + st_mult(sizeof(struct attr_state), num_attr), + is_macro ? 0 : namelen + 1)); if (is_macro) { res->u.attr = git_attr_internal(name, namelen); } else { From d74b1fd54fdbc45966d12ea907dece11e072fb2b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:45:44 +0100 Subject: [PATCH 08/30] attr: fix silently splitting up lines longer than 2048 bytes When reading attributes from a file we use fgets(3P) with a buffer size of 2048 bytes. This means that as soon as a line exceeds the buffer size we split it up into multiple parts and parse each of them as a separate pattern line. This is of course not what the user intended, and even worse the behaviour is inconsistent with how we read attributes from the index. Fix this bug by converting the code to use `strbuf_getline()` instead. This will indeed read in the whole line, which may theoretically lead to an out-of-memory situation when the gitattributes file is huge. We're about to reject any gitattributes files larger than 100MB in the next commit though, which makes this less of a concern. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- attr.c | 13 +++++++------ t/t0003-attributes.sh | 21 +++++++++++++++++++++ 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/attr.c b/attr.c index a9f7063cfc9..41657479fff 100644 --- a/attr.c +++ b/attr.c @@ -699,21 +699,22 @@ void git_attr_set_direction(enum git_attr_direction new_direction) static struct attr_stack *read_attr_from_file(const char *path, int macro_ok) { + struct strbuf buf = STRBUF_INIT; FILE *fp = fopen_or_warn(path, "r"); struct attr_stack *res; - char buf[2048]; int lineno = 0; if (!fp) return NULL; res = xcalloc(1, sizeof(*res)); - while (fgets(buf, sizeof(buf), fp)) { - char *bufp = buf; - if (!lineno) - skip_utf8_bom(&bufp, strlen(bufp)); - handle_attr_line(res, bufp, path, ++lineno, macro_ok); + while (strbuf_getline(&buf, fp) != EOF) { + if (!lineno && starts_with(buf.buf, utf8_bom)) + strbuf_remove(&buf, 0, strlen(utf8_bom)); + handle_attr_line(res, buf.buf, path, ++lineno, macro_ok); } + fclose(fp); + strbuf_release(&buf); return res; } diff --git a/t/t0003-attributes.sh b/t/t0003-attributes.sh index b660593c20f..416386ce2f8 100755 --- a/t/t0003-attributes.sh +++ b/t/t0003-attributes.sh @@ -339,4 +339,25 @@ test_expect_success 'query binary macro directly' ' test_cmp expect actual ' +test_expect_success 'large attributes line ignores trailing content in tree' ' + test_when_finished "rm .gitattributes" && + # older versions of Git broke lines at 2048 bytes; the 2045 bytes + # of 0-padding here is accounting for the three bytes of "a 1", which + # would knock "trailing" to the "next" line, where it would be + # erroneously parsed. + printf "a %02045dtrailing attribute\n" 1 >.gitattributes && + git check-attr --all trailing >actual 2>err && + test_must_be_empty err && + test_must_be_empty actual +' + +test_expect_success 'large attributes line ignores trailing content in index' ' + test_when_finished "git update-index --remove .gitattributes" && + blob=$(printf "a %02045dtrailing attribute\n" 1 | git hash-object -w --stdin) && + git update-index --add --cacheinfo 100644,$blob,.gitattributes && + git check-attr --cached --all trailing >actual 2>err && + test_must_be_empty err && + test_must_be_empty actual +' + test_done From dfa6b32b5e599d97448337ed4fc18dd50c90758f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:45:48 +0100 Subject: [PATCH 09/30] attr: ignore attribute lines exceeding 2048 bytes There are two different code paths to read gitattributes: once via a file, and once via the index. These two paths used to behave differently because when reading attributes from a file, we used fgets(3P) with a buffer size of 2kB. Consequentially, we silently truncate line lengths when lines are longer than that and will then parse the remainder of the line as a new pattern. It goes without saying that this is entirely unexpected, but it's even worse that the behaviour depends on how the gitattributes are parsed. While this is simply wrong, the silent truncation saves us with the recently discovered vulnerabilities that can cause out-of-bound writes or reads with unreasonably long lines due to integer overflows. As the common path is to read gitattributes via the worktree file instead of via the index, we can assume that any gitattributes file that had lines longer than that is already broken anyway. So instead of lifting the limit here, we can double down on it to fix the vulnerabilities. Introduce an explicit line length limit of 2kB that is shared across all paths that read attributes and ignore any line that hits this limit while printing a warning. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- attr.c | 5 +++++ attr.h | 6 ++++++ t/t0003-attributes.sh | 25 +++++++++++++++++++++++-- 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/attr.c b/attr.c index 41657479fff..38ecd2fff30 100644 --- a/attr.c +++ b/attr.c @@ -344,6 +344,11 @@ static struct match_attr *parse_attr_line(const char *line, const char *src, return NULL; name = cp; + if (strlen(line) >= ATTR_MAX_LINE_LENGTH) { + warning(_("ignoring overly long attributes line %d"), lineno); + return NULL; + } + if (*cp == '"' && !unquote_c_style(&pattern, name, &states)) { name = pattern.buf; namelen = pattern.len; diff --git a/attr.h b/attr.h index 404548f028a..df9a75da550 100644 --- a/attr.h +++ b/attr.h @@ -107,6 +107,12 @@ * - Free the `attr_check` struct by calling `attr_check_free()`. */ +/** + * The maximum line length for a gitattributes file. If the line exceeds this + * length we will ignore it. + */ +#define ATTR_MAX_LINE_LENGTH 2048 + struct index_state; /** diff --git a/t/t0003-attributes.sh b/t/t0003-attributes.sh index 416386ce2f8..7d68e6a56e9 100755 --- a/t/t0003-attributes.sh +++ b/t/t0003-attributes.sh @@ -339,6 +339,15 @@ test_expect_success 'query binary macro directly' ' test_cmp expect actual ' +test_expect_success 'large attributes line ignored in tree' ' + test_when_finished "rm .gitattributes" && + printf "path %02043d" 1 >.gitattributes && + git check-attr --all path >actual 2>err && + echo "warning: ignoring overly long attributes line 1" >expect && + test_cmp expect err && + test_must_be_empty actual +' + test_expect_success 'large attributes line ignores trailing content in tree' ' test_when_finished "rm .gitattributes" && # older versions of Git broke lines at 2048 bytes; the 2045 bytes @@ -347,7 +356,18 @@ test_expect_success 'large attributes line ignores trailing content in tree' ' # erroneously parsed. printf "a %02045dtrailing attribute\n" 1 >.gitattributes && git check-attr --all trailing >actual 2>err && - test_must_be_empty err && + echo "warning: ignoring overly long attributes line 1" >expect && + test_cmp expect err && + test_must_be_empty actual +' + +test_expect_success 'large attributes line ignored in index' ' + test_when_finished "git update-index --remove .gitattributes" && + blob=$(printf "path %02043d" 1 | git hash-object -w --stdin) && + git update-index --add --cacheinfo 100644,$blob,.gitattributes && + git check-attr --cached --all path >actual 2>err && + echo "warning: ignoring overly long attributes line 1" >expect && + test_cmp expect err && test_must_be_empty actual ' @@ -356,7 +376,8 @@ test_expect_success 'large attributes line ignores trailing content in index' ' blob=$(printf "a %02045dtrailing attribute\n" 1 | git hash-object -w --stdin) && git update-index --add --cacheinfo 100644,$blob,.gitattributes && git check-attr --cached --all trailing >actual 2>err && - test_must_be_empty err && + echo "warning: ignoring overly long attributes line 1" >expect && + test_cmp expect err && test_must_be_empty actual ' From 3c50032ff5289cc45659f21949c8d09e52164579 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:45:53 +0100 Subject: [PATCH 10/30] attr: ignore overly large gitattributes files Similar as with the preceding commit, start ignoring gitattributes files that are overly large to protect us against out-of-bounds reads and writes caused by integer overflows. Unfortunately, we cannot just define "overly large" in terms of any preexisting limits in the codebase. Instead, we choose a very conservative limit of 100MB. This is plenty of room for specifying gitattributes, and incidentally it is also the limit for blob sizes for GitHub. While we don't want GitHub to dictate limits here, it is still sensible to use this fact for an informed decision given that it is hosting a huge set of repositories. Furthermore, over at GitLab we scanned a subset of repositories for their root-level attribute files. We found that 80% of them have a gitattributes file smaller than 100kB, 99.99% have one smaller than 1MB, and only a single repository had one that was almost 3MB in size. So enforcing a limit of 100MB seems to give us ample of headroom. With this limit in place we can be reasonably sure that there is no easy way to exploit the gitattributes file via integer overflows anymore. Furthermore, it protects us against resource exhaustion caused by allocating the in-memory data structures required to represent the parsed attributes. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- attr.c | 24 ++++++++++++++++++++++-- attr.h | 6 ++++++ t/t0003-attributes.sh | 17 +++++++++++++++++ 3 files changed, 45 insertions(+), 2 deletions(-) diff --git a/attr.c b/attr.c index 38ecd2fff30..f9316d14baf 100644 --- a/attr.c +++ b/attr.c @@ -708,10 +708,25 @@ static struct attr_stack *read_attr_from_file(const char *path, int macro_ok) FILE *fp = fopen_or_warn(path, "r"); struct attr_stack *res; int lineno = 0; + int fd; + struct stat st; if (!fp) return NULL; - res = xcalloc(1, sizeof(*res)); + + fd = fileno(fp); + if (fstat(fd, &st)) { + warning_errno(_("cannot fstat gitattributes file '%s'"), path); + fclose(fp); + return NULL; + } + if (st.st_size >= ATTR_MAX_FILE_SIZE) { + warning(_("ignoring overly large gitattributes file '%s'"), path); + fclose(fp); + return NULL; + } + + CALLOC_ARRAY(res, 1); while (strbuf_getline(&buf, fp) != EOF) { if (!lineno && starts_with(buf.buf, utf8_bom)) strbuf_remove(&buf, 0, strlen(utf8_bom)); @@ -730,13 +745,18 @@ static struct attr_stack *read_attr_from_index(const struct index_state *istate, struct attr_stack *res; char *buf, *sp; int lineno = 0; + size_t size; if (!istate) return NULL; - buf = read_blob_data_from_index(istate, path, NULL); + buf = read_blob_data_from_index(istate, path, &size); if (!buf) return NULL; + if (size >= ATTR_MAX_FILE_SIZE) { + warning(_("ignoring overly large gitattributes blob '%s'"), path); + return NULL; + } res = xcalloc(1, sizeof(*res)); for (sp = buf; *sp; ) { diff --git a/attr.h b/attr.h index df9a75da550..5970f930fd0 100644 --- a/attr.h +++ b/attr.h @@ -113,6 +113,12 @@ */ #define ATTR_MAX_LINE_LENGTH 2048 + /** + * The maximum size of the giattributes file. If the file exceeds this size we + * will ignore it. + */ +#define ATTR_MAX_FILE_SIZE (100 * 1024 * 1024) + struct index_state; /** diff --git a/t/t0003-attributes.sh b/t/t0003-attributes.sh index 7d68e6a56e9..9d9aa2855d2 100755 --- a/t/t0003-attributes.sh +++ b/t/t0003-attributes.sh @@ -361,6 +361,14 @@ test_expect_success 'large attributes line ignores trailing content in tree' ' test_must_be_empty actual ' +test_expect_success EXPENSIVE 'large attributes file ignored in tree' ' + test_when_finished "rm .gitattributes" && + dd if=/dev/zero of=.gitattributes bs=101M count=1 2>/dev/null && + git check-attr --all path >/dev/null 2>err && + echo "warning: ignoring overly large gitattributes file ${SQ}.gitattributes${SQ}" >expect && + test_cmp expect err +' + test_expect_success 'large attributes line ignored in index' ' test_when_finished "git update-index --remove .gitattributes" && blob=$(printf "path %02043d" 1 | git hash-object -w --stdin) && @@ -381,4 +389,13 @@ test_expect_success 'large attributes line ignores trailing content in index' ' test_must_be_empty actual ' +test_expect_success EXPENSIVE 'large attributes file ignored in index' ' + test_when_finished "git update-index --remove .gitattributes" && + blob=$(dd if=/dev/zero bs=101M count=1 2>/dev/null | git hash-object -w --stdin) && + git update-index --add --cacheinfo 100644,$blob,.gitattributes && + git check-attr --cached --all path >/dev/null 2>err && + echo "warning: ignoring overly large gitattributes blob ${SQ}.gitattributes${SQ}" >expect && + test_cmp expect err +' + test_done From a244dc5b0a629290881641467c7a545de7508ab2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= Date: Tue, 2 Nov 2021 15:46:06 +0000 Subject: [PATCH 11/30] test-lib: add prerequisite for 64-bit platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow tests that assume a 64-bit `size_t` to be skipped in 32-bit platforms and regardless of the size of `long`. This imitates the `LONG_IS_64BIT` prerequisite. Signed-off-by: Carlo Marcelo Arenas Belón Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/test-lib.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/t/test-lib.sh b/t/test-lib.sh index 9fa7c1d0f6d..7d6e0f89d13 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1686,6 +1686,10 @@ build_option () { sed -ne "s/^$1: //p" } +test_lazy_prereq SIZE_T_IS_64BIT ' + test 8 -eq "$(build_option sizeof-size_t)" +' + test_lazy_prereq LONG_IS_64BIT ' test 8 -le "$(build_option sizeof-long)" ' From 81dc898df9b4b4035534a927f3234a3839b698bf Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:46:25 +0100 Subject: [PATCH 12/30] pretty: fix out-of-bounds write caused by integer overflow When using a padding specifier in the pretty format passed to git-log(1) we need to calculate the string length in several places. These string lengths are stored in `int`s though, which means that these can easily overflow when the input lengths exceeds 2GB. This can ultimately lead to an out-of-bounds write when these are used in a call to memcpy(3P): ==8340==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x7f1ec62f97fe at pc 0x7f2127e5f427 bp 0x7ffd3bd63de0 sp 0x7ffd3bd63588 WRITE of size 1 at 0x7f1ec62f97fe thread T0 #0 0x7f2127e5f426 in __interceptor_memcpy /usr/src/debug/gcc/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc:827 #1 0x5628e96aa605 in format_and_pad_commit pretty.c:1762 #2 0x5628e96aa7f4 in format_commit_item pretty.c:1801 #3 0x5628e97cdb24 in strbuf_expand strbuf.c:429 #4 0x5628e96ab060 in repo_format_commit_message pretty.c:1869 #5 0x5628e96acd0f in pretty_print_commit pretty.c:2161 #6 0x5628e95a44c8 in show_log log-tree.c:781 #7 0x5628e95a76ba in log_tree_commit log-tree.c:1117 #8 0x5628e922bed5 in cmd_log_walk_no_free builtin/log.c:508 #9 0x5628e922c35b in cmd_log_walk builtin/log.c:549 #10 0x5628e922f1a2 in cmd_log builtin/log.c:883 #11 0x5628e9106993 in run_builtin git.c:466 #12 0x5628e9107397 in handle_builtin git.c:721 #13 0x5628e9107b07 in run_argv git.c:788 #14 0x5628e91088a7 in cmd_main git.c:923 #15 0x5628e939d682 in main common-main.c:57 #16 0x7f2127c3c28f (/usr/lib/libc.so.6+0x2328f) #17 0x7f2127c3c349 in __libc_start_main (/usr/lib/libc.so.6+0x23349) #18 0x5628e91020e4 in _start ../sysdeps/x86_64/start.S:115 0x7f1ec62f97fe is located 2 bytes to the left of 4831838265-byte region [0x7f1ec62f9800,0x7f1fe62f9839) allocated by thread T0 here: #0 0x7f2127ebe7ea in __interceptor_realloc /usr/src/debug/gcc/libsanitizer/asan/asan_malloc_linux.cpp:85 #1 0x5628e98774d4 in xrealloc wrapper.c:136 #2 0x5628e97cb01c in strbuf_grow strbuf.c:99 #3 0x5628e97ccd42 in strbuf_addchars strbuf.c:327 #4 0x5628e96aa55c in format_and_pad_commit pretty.c:1761 #5 0x5628e96aa7f4 in format_commit_item pretty.c:1801 #6 0x5628e97cdb24 in strbuf_expand strbuf.c:429 #7 0x5628e96ab060 in repo_format_commit_message pretty.c:1869 #8 0x5628e96acd0f in pretty_print_commit pretty.c:2161 #9 0x5628e95a44c8 in show_log log-tree.c:781 #10 0x5628e95a76ba in log_tree_commit log-tree.c:1117 #11 0x5628e922bed5 in cmd_log_walk_no_free builtin/log.c:508 #12 0x5628e922c35b in cmd_log_walk builtin/log.c:549 #13 0x5628e922f1a2 in cmd_log builtin/log.c:883 #14 0x5628e9106993 in run_builtin git.c:466 #15 0x5628e9107397 in handle_builtin git.c:721 #16 0x5628e9107b07 in run_argv git.c:788 #17 0x5628e91088a7 in cmd_main git.c:923 #18 0x5628e939d682 in main common-main.c:57 #19 0x7f2127c3c28f (/usr/lib/libc.so.6+0x2328f) #20 0x7f2127c3c349 in __libc_start_main (/usr/lib/libc.so.6+0x23349) #21 0x5628e91020e4 in _start ../sysdeps/x86_64/start.S:115 SUMMARY: AddressSanitizer: heap-buffer-overflow /usr/src/debug/gcc/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc:827 in __interceptor_memcpy Shadow bytes around the buggy address: 0x0fe458c572a0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0fe458c572b0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0fe458c572c0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0fe458c572d0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0fe458c572e0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa =>0x0fe458c572f0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa[fa] 0x0fe458c57300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0fe458c57310: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0fe458c57320: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0fe458c57330: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0fe458c57340: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Shadow byte legend (one shadow byte represents 8 application bytes): Addressable: 00 Partially addressable: 01 02 03 04 05 06 07 Heap left redzone: fa Freed heap region: fd Stack left redzone: f1 Stack mid redzone: f2 Stack right redzone: f3 Stack after return: f5 Stack use after scope: f8 Global redzone: f9 Global init order: f6 Poisoned by user: f7 Container overflow: fc Array cookie: ac Intra object redzone: bb ASan internal: fe Left alloca redzone: ca Right alloca redzone: cb ==8340==ABORTING The pretty format can also be used in `git archive` operations via the `export-subst` attribute. So this is what in our opinion makes this a critical issue in the context of Git forges which allow to download an archive of user supplied Git repositories. Fix this vulnerability by using `size_t` instead of `int` to track the string lengths. Add tests which detect this vulnerability when Git is compiled with the address sanitizer. Reported-by: Joern Schneeweisz Original-patch-by: Joern Schneeweisz Modified-by: Taylor Blau Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- pretty.c | 11 ++++++----- t/t4205-log-pretty-formats.sh | 17 +++++++++++++++++ 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/pretty.c b/pretty.c index 7a7708a0ea7..a1a01492c19 100644 --- a/pretty.c +++ b/pretty.c @@ -1473,7 +1473,9 @@ static size_t format_and_pad_commit(struct strbuf *sb, /* in UTF-8 */ struct format_commit_context *c) { struct strbuf local_sb = STRBUF_INIT; - int total_consumed = 0, len, padding = c->padding; + size_t total_consumed = 0; + int len, padding = c->padding; + if (padding < 0) { const char *start = strrchr(sb->buf, '\n'); int occupied; @@ -1485,7 +1487,7 @@ static size_t format_and_pad_commit(struct strbuf *sb, /* in UTF-8 */ } while (1) { int modifier = *placeholder == 'C'; - int consumed = format_commit_one(&local_sb, placeholder, c); + size_t consumed = format_commit_one(&local_sb, placeholder, c); total_consumed += consumed; if (!modifier) @@ -1551,7 +1553,7 @@ static size_t format_and_pad_commit(struct strbuf *sb, /* in UTF-8 */ } strbuf_addbuf(sb, &local_sb); } else { - int sb_len = sb->len, offset = 0; + size_t sb_len = sb->len, offset = 0; if (c->flush_type == flush_left) offset = padding - len; else if (c->flush_type == flush_both) @@ -1574,8 +1576,7 @@ static size_t format_commit_item(struct strbuf *sb, /* in UTF-8 */ const char *placeholder, void *context) { - int consumed; - size_t orig_len; + size_t consumed, orig_len; enum { NO_MAGIC, ADD_LF_BEFORE_NON_EMPTY, diff --git a/t/t4205-log-pretty-formats.sh b/t/t4205-log-pretty-formats.sh index 204c149d5a4..fff3e05615e 100755 --- a/t/t4205-log-pretty-formats.sh +++ b/t/t4205-log-pretty-formats.sh @@ -867,4 +867,21 @@ test_expect_success 'log --pretty=reference is colored appropriately' ' test_cmp expect actual ' +test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'log --pretty with huge commit message' ' + # We only assert that this command does not crash. This needs to be + # executed with the address sanitizer to demonstrate failure. + git log -1 --pretty="format:%>(2147483646)%x41%41%>(2147483646)%x41" >/dev/null +' + +test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'set up huge commit' ' + test-tool genzeros 2147483649 | tr "\000" "1" >expect && + huge_commit=$(git commit-tree -F expect HEAD^{tree}) +' + +test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'log --pretty with huge commit message' ' + git log -1 --format="%B%<(1)%x30" $huge_commit >actual && + echo 0 >>expect && + test_cmp expect actual +' + test_done From b49f309aa16febeddb65e82526640a91bbba3be3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:46:30 +0100 Subject: [PATCH 13/30] pretty: fix out-of-bounds read when left-flushing with stealing With the `%>>()` pretty formatter, you can ask git-log(1) et al to steal spaces. To do so we need to look ahead of the next token to see whether there are spaces there. This loop takes into account ANSI sequences that end with an `m`, and if it finds any it will skip them until it finds the first space. While doing so it does not take into account the buffer's limits though and easily does an out-of-bounds read. Add a test that hits this behaviour. While we don't have an easy way to verify this, the test causes the following failure when run with `SANITIZE=address`: ==37941==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x603000000baf at pc 0x55ba6f88e0d0 bp 0x7ffc84c50d20 sp 0x7ffc84c50d10 READ of size 1 at 0x603000000baf thread T0 #0 0x55ba6f88e0cf in format_and_pad_commit pretty.c:1712 #1 0x55ba6f88e7b4 in format_commit_item pretty.c:1801 #2 0x55ba6f9b1ae4 in strbuf_expand strbuf.c:429 #3 0x55ba6f88f020 in repo_format_commit_message pretty.c:1869 #4 0x55ba6f890ccf in pretty_print_commit pretty.c:2161 #5 0x55ba6f7884c8 in show_log log-tree.c:781 #6 0x55ba6f78b6ba in log_tree_commit log-tree.c:1117 #7 0x55ba6f40fed5 in cmd_log_walk_no_free builtin/log.c:508 #8 0x55ba6f41035b in cmd_log_walk builtin/log.c:549 #9 0x55ba6f4131a2 in cmd_log builtin/log.c:883 #10 0x55ba6f2ea993 in run_builtin git.c:466 #11 0x55ba6f2eb397 in handle_builtin git.c:721 #12 0x55ba6f2ebb07 in run_argv git.c:788 #13 0x55ba6f2ec8a7 in cmd_main git.c:923 #14 0x55ba6f581682 in main common-main.c:57 #15 0x7f2d08c3c28f (/usr/lib/libc.so.6+0x2328f) #16 0x7f2d08c3c349 in __libc_start_main (/usr/lib/libc.so.6+0x23349) #17 0x55ba6f2e60e4 in _start ../sysdeps/x86_64/start.S:115 0x603000000baf is located 1 bytes to the left of 24-byte region [0x603000000bb0,0x603000000bc8) allocated by thread T0 here: #0 0x7f2d08ebe7ea in __interceptor_realloc /usr/src/debug/gcc/libsanitizer/asan/asan_malloc_linux.cpp:85 #1 0x55ba6fa5b494 in xrealloc wrapper.c:136 #2 0x55ba6f9aefdc in strbuf_grow strbuf.c:99 #3 0x55ba6f9b0a06 in strbuf_add strbuf.c:298 #4 0x55ba6f9b1a25 in strbuf_expand strbuf.c:418 #5 0x55ba6f88f020 in repo_format_commit_message pretty.c:1869 #6 0x55ba6f890ccf in pretty_print_commit pretty.c:2161 #7 0x55ba6f7884c8 in show_log log-tree.c:781 #8 0x55ba6f78b6ba in log_tree_commit log-tree.c:1117 #9 0x55ba6f40fed5 in cmd_log_walk_no_free builtin/log.c:508 #10 0x55ba6f41035b in cmd_log_walk builtin/log.c:549 #11 0x55ba6f4131a2 in cmd_log builtin/log.c:883 #12 0x55ba6f2ea993 in run_builtin git.c:466 #13 0x55ba6f2eb397 in handle_builtin git.c:721 #14 0x55ba6f2ebb07 in run_argv git.c:788 #15 0x55ba6f2ec8a7 in cmd_main git.c:923 #16 0x55ba6f581682 in main common-main.c:57 #17 0x7f2d08c3c28f (/usr/lib/libc.so.6+0x2328f) #18 0x7f2d08c3c349 in __libc_start_main (/usr/lib/libc.so.6+0x23349) #19 0x55ba6f2e60e4 in _start ../sysdeps/x86_64/start.S:115 SUMMARY: AddressSanitizer: heap-buffer-overflow pretty.c:1712 in format_and_pad_commit Shadow bytes around the buggy address: 0x0c067fff8120: fa fa fd fd fd fa fa fa fd fd fd fa fa fa fd fd 0x0c067fff8130: fd fd fa fa fd fd fd fd fa fa fd fd fd fa fa fa 0x0c067fff8140: fd fd fd fa fa fa fd fd fd fa fa fa fd fd fd fa 0x0c067fff8150: fa fa fd fd fd fd fa fa 00 00 00 fa fa fa fd fd 0x0c067fff8160: fd fa fa fa fd fd fd fa fa fa fd fd fd fa fa fa =>0x0c067fff8170: fd fd fd fa fa[fa]00 00 00 fa fa fa 00 00 00 fa 0x0c067fff8180: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c067fff8190: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c067fff81a0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c067fff81b0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c067fff81c0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa Shadow byte legend (one shadow byte represents 8 application bytes): Addressable: 00 Partially addressable: 01 02 03 04 05 06 07 Heap left redzone: fa Freed heap region: fd Stack left redzone: f1 Stack mid redzone: f2 Stack right redzone: f3 Stack after return: f5 Stack use after scope: f8 Global redzone: f9 Global init order: f6 Poisoned by user: f7 Container overflow: fc Array cookie: ac Intra object redzone: bb ASan internal: fe Left alloca redzone: ca Right alloca redzone: cb Luckily enough, this would only cause us to copy the out-of-bounds data into the formatted commit in case we really had an ANSI sequence preceding our buffer. So this bug likely has no security consequences. Fix it regardless by not traversing past the buffer's start. Reported-by: Patrick Steinhardt Reported-by: Eric Sesterhenn Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- pretty.c | 2 +- t/t4205-log-pretty-formats.sh | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pretty.c b/pretty.c index a1a01492c19..692a6382a11 100644 --- a/pretty.c +++ b/pretty.c @@ -1514,7 +1514,7 @@ static size_t format_and_pad_commit(struct strbuf *sb, /* in UTF-8 */ if (*ch != 'm') break; p = ch - 1; - while (ch - p < 10 && *p != '\033') + while (p > sb->buf && ch - p < 10 && *p != '\033') p--; if (*p != '\033' || ch + 1 - p != display_mode_esc_sequence_len(p)) diff --git a/t/t4205-log-pretty-formats.sh b/t/t4205-log-pretty-formats.sh index fff3e05615e..126dc20f23c 100755 --- a/t/t4205-log-pretty-formats.sh +++ b/t/t4205-log-pretty-formats.sh @@ -867,6 +867,12 @@ test_expect_success 'log --pretty=reference is colored appropriately' ' test_cmp expect actual ' +test_expect_success 'log --pretty with space stealing' ' + printf mm0 >expect && + git log -1 --pretty="format:mm%>>|(1)%x30" >actual && + test_cmp expect actual +' + test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'log --pretty with huge commit message' ' # We only assert that this command does not crash. This needs to be # executed with the address sanitizer to demonstrate failure. From f6e0b9f38987ad5e47bab551f8760b70689a5905 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:46:34 +0100 Subject: [PATCH 14/30] pretty: fix out-of-bounds read when parsing invalid padding format An out-of-bounds read can be triggered when parsing an incomplete padding format string passed via `--pretty=format` or in Git archives when files are marked with the `export-subst` gitattribute. This bug exists since we have introduced support for truncating output via the `trunc` keyword a7f01c6b4d (pretty: support truncating in %>, %< and %><, 2013-04-19). Before this commit, we used to find the end of the formatting string by using strchr(3P). This function returns a `NULL` pointer in case the character in question wasn't found. The subsequent check whether any character was found thus simply checked the returned pointer. After the commit we switched to strcspn(3P) though, which only returns the offset to the first found character or to the trailing NUL byte. As the end pointer is now computed by adding the offset to the start pointer it won't be `NULL` anymore, and as a consequence the check doesn't do anything anymore. The out-of-bounds data that is being read can in fact end up in the formatted string. As a consequence, it is possible to leak memory contents either by calling git-log(1) or via git-archive(1) when any of the archived files is marked with the `export-subst` gitattribute. ==10888==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x602000000398 at pc 0x7f0356047cb2 bp 0x7fff3ffb95d0 sp 0x7fff3ffb8d78 READ of size 1 at 0x602000000398 thread T0 #0 0x7f0356047cb1 in __interceptor_strchrnul /usr/src/debug/gcc/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc:725 #1 0x563b7cec9a43 in strbuf_expand strbuf.c:417 #2 0x563b7cda7060 in repo_format_commit_message pretty.c:1869 #3 0x563b7cda8d0f in pretty_print_commit pretty.c:2161 #4 0x563b7cca04c8 in show_log log-tree.c:781 #5 0x563b7cca36ba in log_tree_commit log-tree.c:1117 #6 0x563b7c927ed5 in cmd_log_walk_no_free builtin/log.c:508 #7 0x563b7c92835b in cmd_log_walk builtin/log.c:549 #8 0x563b7c92b1a2 in cmd_log builtin/log.c:883 #9 0x563b7c802993 in run_builtin git.c:466 #10 0x563b7c803397 in handle_builtin git.c:721 #11 0x563b7c803b07 in run_argv git.c:788 #12 0x563b7c8048a7 in cmd_main git.c:923 #13 0x563b7ca99682 in main common-main.c:57 #14 0x7f0355e3c28f (/usr/lib/libc.so.6+0x2328f) #15 0x7f0355e3c349 in __libc_start_main (/usr/lib/libc.so.6+0x23349) #16 0x563b7c7fe0e4 in _start ../sysdeps/x86_64/start.S:115 0x602000000398 is located 0 bytes to the right of 8-byte region [0x602000000390,0x602000000398) allocated by thread T0 here: #0 0x7f0356072faa in __interceptor_strdup /usr/src/debug/gcc/libsanitizer/asan/asan_interceptors.cpp:439 #1 0x563b7cf7317c in xstrdup wrapper.c:39 #2 0x563b7cd9a06a in save_user_format pretty.c:40 #3 0x563b7cd9b3e5 in get_commit_format pretty.c:173 #4 0x563b7ce54ea0 in handle_revision_opt revision.c:2456 #5 0x563b7ce597c9 in setup_revisions revision.c:2850 #6 0x563b7c9269e0 in cmd_log_init_finish builtin/log.c:269 #7 0x563b7c927362 in cmd_log_init builtin/log.c:348 #8 0x563b7c92b193 in cmd_log builtin/log.c:882 #9 0x563b7c802993 in run_builtin git.c:466 #10 0x563b7c803397 in handle_builtin git.c:721 #11 0x563b7c803b07 in run_argv git.c:788 #12 0x563b7c8048a7 in cmd_main git.c:923 #13 0x563b7ca99682 in main common-main.c:57 #14 0x7f0355e3c28f (/usr/lib/libc.so.6+0x2328f) #15 0x7f0355e3c349 in __libc_start_main (/usr/lib/libc.so.6+0x23349) #16 0x563b7c7fe0e4 in _start ../sysdeps/x86_64/start.S:115 SUMMARY: AddressSanitizer: heap-buffer-overflow /usr/src/debug/gcc/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc:725 in __interceptor_strchrnul Shadow bytes around the buggy address: 0x0c047fff8020: fa fa fd fd fa fa 00 06 fa fa 05 fa fa fa fd fd 0x0c047fff8030: fa fa 00 02 fa fa 06 fa fa fa 05 fa fa fa fd fd 0x0c047fff8040: fa fa 00 07 fa fa 03 fa fa fa fd fd fa fa 00 00 0x0c047fff8050: fa fa 00 01 fa fa fd fd fa fa 00 00 fa fa 00 01 0x0c047fff8060: fa fa 00 06 fa fa 00 06 fa fa 05 fa fa fa 05 fa =>0x0c047fff8070: fa fa 00[fa]fa fa fd fa fa fa fd fd fa fa fd fd 0x0c047fff8080: fa fa fd fd fa fa 00 00 fa fa 00 fa fa fa fd fa 0x0c047fff8090: fa fa fd fd fa fa 00 00 fa fa fa fa fa fa fa fa 0x0c047fff80a0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c047fff80b0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c047fff80c0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa Shadow byte legend (one shadow byte represents 8 application bytes): Addressable: 00 Partially addressable: 01 02 03 04 05 06 07 Heap left redzone: fa Freed heap region: fd Stack left redzone: f1 Stack mid redzone: f2 Stack right redzone: f3 Stack after return: f5 Stack use after scope: f8 Global redzone: f9 Global init order: f6 Poisoned by user: f7 Container overflow: fc Array cookie: ac Intra object redzone: bb ASan internal: fe Left alloca redzone: ca Right alloca redzone: cb ==10888==ABORTING Fix this bug by checking whether `end` points at the trailing NUL byte. Add a test which catches this out-of-bounds read and which demonstrates that we used to write out-of-bounds data into the formatted message. Reported-by: Markus Vervier Original-patch-by: Markus Vervier Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- pretty.c | 2 +- t/t4205-log-pretty-formats.sh | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pretty.c b/pretty.c index 692a6382a11..d55b88607af 100644 --- a/pretty.c +++ b/pretty.c @@ -1041,7 +1041,7 @@ static size_t parse_padding_placeholder(const char *placeholder, const char *end = start + strcspn(start, ",)"); char *next; int width; - if (!end || end == start) + if (!*end || end == start) return 0; width = strtol(start, &next, 10); if (next == start || width == 0) diff --git a/t/t4205-log-pretty-formats.sh b/t/t4205-log-pretty-formats.sh index 126dc20f23c..cdde37d3257 100755 --- a/t/t4205-log-pretty-formats.sh +++ b/t/t4205-log-pretty-formats.sh @@ -873,6 +873,12 @@ test_expect_success 'log --pretty with space stealing' ' test_cmp expect actual ' +test_expect_success 'log --pretty with invalid padding format' ' + printf "%s%%<(20" "$(git rev-parse HEAD)" >expect && + git log -1 --pretty="format:%H%<(20" >actual && + test_cmp expect actual +' + test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'log --pretty with huge commit message' ' # We only assert that this command does not crash. This needs to be # executed with the address sanitizer to demonstrate failure. From 1de69c0cdd388b0a5b7bdde0bfa0bda514a354b0 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:46:39 +0100 Subject: [PATCH 15/30] pretty: fix adding linefeed when placeholder is not expanded When a formatting directive has a `+` or ` ` after the `%`, then we add either a line feed or space if the placeholder expands to a non-empty string. In specific cases though this logic doesn't work as expected, and we try to add the character even in the case where the formatting directive is empty. One such pattern is `%w(1)%+d%+w(2)`. `%+d` expands to reference names pointing to a certain commit, like in `git log --decorate`. For a tagged commit this would for example expand to `\n (tag: v1.0.0)`, which has a leading newline due to the `+` modifier and a space added by `%d`. Now the second wrapping directive will cause us to rewrap the text to `\n(tag:\nv1.0.0)`, which is one byte shorter due to the missing leading space. The code that handles the `+` magic now notices that the length has changed and will thus try to insert a leading line feed at the original posititon. But as the string was shortened, the original position is past the buffer's boundary and thus we die with an error. Now there are two issues here: 1. We check whether the buffer length has changed, not whether it has been extended. This causes us to try and add the character past the string boundary. 2. The current logic does not make any sense whatsoever. When the string got expanded due to the rewrap, putting the separator into the original position is likely to put it somewhere into the middle of the rewrapped contents. It is debatable whether `%+w()` makes any sense in the first place. Strictly speaking, the placeholder never expands to a non-empty string, and consequentially we shouldn't ever accept this combination. We thus fix the bug by simply refusing `%+w()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- pretty.c | 14 +++++++++++++- t/t4205-log-pretty-formats.sh | 8 ++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/pretty.c b/pretty.c index d55b88607af..c6c757c0cea 100644 --- a/pretty.c +++ b/pretty.c @@ -1597,9 +1597,21 @@ static size_t format_commit_item(struct strbuf *sb, /* in UTF-8 */ default: break; } - if (magic != NO_MAGIC) + if (magic != NO_MAGIC) { placeholder++; + switch (placeholder[0]) { + case 'w': + /* + * `%+w()` cannot ever expand to a non-empty string, + * and it potentially changes the layout of preceding + * contents. We're thus not able to handle the magic in + * this combination and refuse the pattern. + */ + return 0; + }; + } + orig_len = sb->len; if (((struct format_commit_context *)context)->flush_type != no_flush) consumed = format_and_pad_commit(sb, placeholder, context); diff --git a/t/t4205-log-pretty-formats.sh b/t/t4205-log-pretty-formats.sh index cdde37d3257..1d768f72446 100755 --- a/t/t4205-log-pretty-formats.sh +++ b/t/t4205-log-pretty-formats.sh @@ -879,6 +879,14 @@ test_expect_success 'log --pretty with invalid padding format' ' test_cmp expect actual ' +test_expect_success 'log --pretty with magical wrapping directives' ' + commit_id=$(git commit-tree HEAD^{tree} -m "describe me") && + git tag describe-me $commit_id && + printf "\n(tag:\ndescribe-me)%%+w(2)" >expect && + git log -1 --pretty="format:%w(1)%+d%+w(2)" $commit_id >actual && + test_cmp expect actual +' + test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'log --pretty with huge commit message' ' # We only assert that this command does not crash. This needs to be # executed with the address sanitizer to demonstrate failure. From 48050c42c73c28b0c001d63d11dffac7e116847b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:46:49 +0100 Subject: [PATCH 16/30] pretty: fix integer overflow in wrapping format The `%w(width,indent1,indent2)` formatting directive can be used to rewrap text to a specific width and is designed after git-shortlog(1)'s `-w` parameter. While the three parameters are all stored as `size_t` internally, `strbuf_add_wrapped_text()` accepts integers as input. As a result, the casted integers may overflow. As these now-negative integers are later on passed to `strbuf_addchars()`, we will ultimately run into implementation-defined behaviour due to casting a negative number back to `size_t` again. On my platform, this results in trying to allocate 9000 petabyte of memory. Fix this overflow by using `cast_size_t_to_int()` so that we reject inputs that cannot be represented as an integer. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- git-compat-util.h | 8 ++++++++ pretty.c | 4 +++- t/t4205-log-pretty-formats.sh | 12 ++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/git-compat-util.h b/git-compat-util.h index f505f817d5f..0ac1b7f560c 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -918,6 +918,14 @@ static inline size_t st_sub(size_t a, size_t b) return a - b; } +static inline int cast_size_t_to_int(size_t a) +{ + if (a > INT_MAX) + die("number too large to represent as int on this platform: %"PRIuMAX, + (uintmax_t)a); + return (int)a; +} + #ifdef HAVE_ALLOCA_H # include # define xalloca(size) (alloca(size)) diff --git a/pretty.c b/pretty.c index c6c757c0cea..7e649b1cec7 100644 --- a/pretty.c +++ b/pretty.c @@ -915,7 +915,9 @@ static void strbuf_wrap(struct strbuf *sb, size_t pos, if (pos) strbuf_add(&tmp, sb->buf, pos); strbuf_add_wrapped_text(&tmp, sb->buf + pos, - (int) indent1, (int) indent2, (int) width); + cast_size_t_to_int(indent1), + cast_size_t_to_int(indent2), + cast_size_t_to_int(width)); strbuf_swap(&tmp, sb); strbuf_release(&tmp); } diff --git a/t/t4205-log-pretty-formats.sh b/t/t4205-log-pretty-formats.sh index 1d768f72446..c88b64d08b6 100755 --- a/t/t4205-log-pretty-formats.sh +++ b/t/t4205-log-pretty-formats.sh @@ -887,6 +887,18 @@ test_expect_success 'log --pretty with magical wrapping directives' ' test_cmp expect actual ' +test_expect_success SIZE_T_IS_64BIT 'log --pretty with overflowing wrapping directive' ' + cat >expect <<-EOF && + fatal: number too large to represent as int on this platform: 2147483649 + EOF + test_must_fail git log -1 --pretty="format:%w(2147483649,1,1)%d" 2>error && + test_cmp expect error && + test_must_fail git log -1 --pretty="format:%w(1,2147483649,1)%d" 2>error && + test_cmp expect error && + test_must_fail git log -1 --pretty="format:%w(1,1,2147483649)%d" 2>error && + test_cmp expect error +' + test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'log --pretty with huge commit message' ' # We only assert that this command does not crash. This needs to be # executed with the address sanitizer to demonstrate failure. From 522cc87fdc25449222a5894a428eebf4b8d5eaa9 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:46:53 +0100 Subject: [PATCH 17/30] utf8: fix truncated string lengths in `utf8_strnwidth()` The `utf8_strnwidth()` function accepts an optional string length as input parameter. This parameter can either be set to `-1`, in which case we call `strlen()` on the input. Or it can be set to a positive integer that indicates a precomputed length, which callers typically compute by calling `strlen()` at some point themselves. The input parameter is an `int` though, whereas `strlen()` returns a `size_t`. This can lead to implementation-defined behaviour though when the `size_t` cannot be represented by the `int`. In the general case though this leads to wrap-around and thus to negative string sizes, which is sure enough to not lead to well-defined behaviour. Fix this by accepting a `size_t` instead of an `int` as string length. While this takes away the ability of callers to simply pass in `-1` as string length, it really is trivial enough to convert them to instead pass in `strlen()` instead. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- column.c | 2 +- pretty.c | 4 ++-- utf8.c | 8 +++----- utf8.h | 2 +- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/column.c b/column.c index 1261e18a72e..fbf88639aae 100644 --- a/column.c +++ b/column.c @@ -23,7 +23,7 @@ struct column_data { /* return length of 's' in letters, ANSI escapes stripped */ static int item_length(const char *s) { - return utf8_strnwidth(s, -1, 1); + return utf8_strnwidth(s, strlen(s), 1); } /* diff --git a/pretty.c b/pretty.c index 7e649b1cec7..aae6e792bc4 100644 --- a/pretty.c +++ b/pretty.c @@ -1483,7 +1483,7 @@ static size_t format_and_pad_commit(struct strbuf *sb, /* in UTF-8 */ int occupied; if (!start) start = sb->buf; - occupied = utf8_strnwidth(start, -1, 1); + occupied = utf8_strnwidth(start, strlen(start), 1); occupied += c->pretty_ctx->graph_width; padding = (-padding) - occupied; } @@ -1501,7 +1501,7 @@ static size_t format_and_pad_commit(struct strbuf *sb, /* in UTF-8 */ placeholder++; total_consumed++; } - len = utf8_strnwidth(local_sb.buf, -1, 1); + len = utf8_strnwidth(local_sb.buf, local_sb.len, 1); if (c->flush_type == flush_left_and_steal) { const char *ch = sb->buf + sb->len - 1; diff --git a/utf8.c b/utf8.c index 5b39361ada0..504e517c341 100644 --- a/utf8.c +++ b/utf8.c @@ -206,13 +206,11 @@ int utf8_width(const char **start, size_t *remainder_p) * string, assuming that the string is utf8. Returns strlen() instead * if the string does not look like a valid utf8 string. */ -int utf8_strnwidth(const char *string, int len, int skip_ansi) +int utf8_strnwidth(const char *string, size_t len, int skip_ansi) { int width = 0; const char *orig = string; - if (len == -1) - len = strlen(string); while (string && string < orig + len) { int skip; while (skip_ansi && @@ -225,7 +223,7 @@ int utf8_strnwidth(const char *string, int len, int skip_ansi) int utf8_strwidth(const char *string) { - return utf8_strnwidth(string, -1, 0); + return utf8_strnwidth(string, strlen(string), 0); } int is_utf8(const char *text) @@ -791,7 +789,7 @@ int skip_utf8_bom(char **text, size_t len) void strbuf_utf8_align(struct strbuf *buf, align_type position, unsigned int width, const char *s) { - int slen = strlen(s); + size_t slen = strlen(s); int display_len = utf8_strnwidth(s, slen, 0); int utf8_compensation = slen - display_len; diff --git a/utf8.h b/utf8.h index fcd5167bafb..6da1b6d05e2 100644 --- a/utf8.h +++ b/utf8.h @@ -7,7 +7,7 @@ typedef unsigned int ucs_char_t; /* assuming 32bit int */ size_t display_mode_esc_sequence_len(const char *s); int utf8_width(const char **start, size_t *remainder_p); -int utf8_strnwidth(const char *string, int len, int skip_ansi); +int utf8_strnwidth(const char *string, size_t len, int skip_ansi); int utf8_strwidth(const char *string); int is_utf8(const char *text); int is_encoding_utf8(const char *name); From 17d23e8a3812a5ca3dd6564e74d5250f22e5d76d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:47:00 +0100 Subject: [PATCH 18/30] utf8: fix returning negative string width The `utf8_strnwidth()` function calls `utf8_width()` in a loop and adds its returned width to the end result. `utf8_width()` can return `-1` though in case it reads a control character, which means that the computed string width is going to be wrong. In the worst case where there are more control characters than non-control characters, we may even return a negative string width. Fix this bug by treating control characters as having zero width. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- t/t4205-log-pretty-formats.sh | 6 ++++++ utf8.c | 8 ++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/t/t4205-log-pretty-formats.sh b/t/t4205-log-pretty-formats.sh index c88b64d08b6..e3905baa3c4 100755 --- a/t/t4205-log-pretty-formats.sh +++ b/t/t4205-log-pretty-formats.sh @@ -899,6 +899,12 @@ test_expect_success SIZE_T_IS_64BIT 'log --pretty with overflowing wrapping dire test_cmp expect error ' +test_expect_success 'log --pretty with padding and preceding control chars' ' + printf "\20\20 0" >expect && + git log -1 --pretty="format:%x10%x10%>|(4)%x30" >actual && + test_cmp expect actual +' + test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'log --pretty with huge commit message' ' # We only assert that this command does not crash. This needs to be # executed with the address sanitizer to demonstrate failure. diff --git a/utf8.c b/utf8.c index 504e517c341..6a21fd6a7b0 100644 --- a/utf8.c +++ b/utf8.c @@ -212,11 +212,15 @@ int utf8_strnwidth(const char *string, size_t len, int skip_ansi) const char *orig = string; while (string && string < orig + len) { - int skip; + int glyph_width, skip; + while (skip_ansi && (skip = display_mode_esc_sequence_len(string)) != 0) string += skip; - width += utf8_width(&string, NULL); + + glyph_width = utf8_width(&string, NULL); + if (glyph_width > 0) + width += glyph_width; } return string ? width : len; } From 937b71cc8b5b998963a7f9a33312ba3549d55510 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:47:04 +0100 Subject: [PATCH 19/30] utf8: fix overflow when returning string width The return type of both `utf8_strwidth()` and `utf8_strnwidth()` is `int`, but we operate on string lengths which are typically of type `size_t`. This means that when the string is longer than `INT_MAX`, we will overflow and thus return a negative result. This can lead to an out-of-bounds write with `--pretty=format:%<1)%B` and a commit message that is 2^31+1 bytes long: ================================================================= ==26009==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x603000001168 at pc 0x7f95c4e5f427 bp 0x7ffd8541c900 sp 0x7ffd8541c0a8 WRITE of size 2147483649 at 0x603000001168 thread T0 #0 0x7f95c4e5f426 in __interceptor_memcpy /usr/src/debug/gcc/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc:827 #1 0x5612bbb1068c in format_and_pad_commit pretty.c:1763 #2 0x5612bbb1087a in format_commit_item pretty.c:1801 #3 0x5612bbc33bab in strbuf_expand strbuf.c:429 #4 0x5612bbb110e7 in repo_format_commit_message pretty.c:1869 #5 0x5612bbb12d96 in pretty_print_commit pretty.c:2161 #6 0x5612bba0a4d5 in show_log log-tree.c:781 #7 0x5612bba0d6c7 in log_tree_commit log-tree.c:1117 #8 0x5612bb691ed5 in cmd_log_walk_no_free builtin/log.c:508 #9 0x5612bb69235b in cmd_log_walk builtin/log.c:549 #10 0x5612bb6951a2 in cmd_log builtin/log.c:883 #11 0x5612bb56c993 in run_builtin git.c:466 #12 0x5612bb56d397 in handle_builtin git.c:721 #13 0x5612bb56db07 in run_argv git.c:788 #14 0x5612bb56e8a7 in cmd_main git.c:923 #15 0x5612bb803682 in main common-main.c:57 #16 0x7f95c4c3c28f (/usr/lib/libc.so.6+0x2328f) #17 0x7f95c4c3c349 in __libc_start_main (/usr/lib/libc.so.6+0x23349) #18 0x5612bb5680e4 in _start ../sysdeps/x86_64/start.S:115 0x603000001168 is located 0 bytes to the right of 24-byte region [0x603000001150,0x603000001168) allocated by thread T0 here: #0 0x7f95c4ebe7ea in __interceptor_realloc /usr/src/debug/gcc/libsanitizer/asan/asan_malloc_linux.cpp:85 #1 0x5612bbcdd556 in xrealloc wrapper.c:136 #2 0x5612bbc310a3 in strbuf_grow strbuf.c:99 #3 0x5612bbc32acd in strbuf_add strbuf.c:298 #4 0x5612bbc33aec in strbuf_expand strbuf.c:418 #5 0x5612bbb110e7 in repo_format_commit_message pretty.c:1869 #6 0x5612bbb12d96 in pretty_print_commit pretty.c:2161 #7 0x5612bba0a4d5 in show_log log-tree.c:781 #8 0x5612bba0d6c7 in log_tree_commit log-tree.c:1117 #9 0x5612bb691ed5 in cmd_log_walk_no_free builtin/log.c:508 #10 0x5612bb69235b in cmd_log_walk builtin/log.c:549 #11 0x5612bb6951a2 in cmd_log builtin/log.c:883 #12 0x5612bb56c993 in run_builtin git.c:466 #13 0x5612bb56d397 in handle_builtin git.c:721 #14 0x5612bb56db07 in run_argv git.c:788 #15 0x5612bb56e8a7 in cmd_main git.c:923 #16 0x5612bb803682 in main common-main.c:57 #17 0x7f95c4c3c28f (/usr/lib/libc.so.6+0x2328f) SUMMARY: AddressSanitizer: heap-buffer-overflow /usr/src/debug/gcc/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc:827 in __interceptor_memcpy Shadow bytes around the buggy address: 0x0c067fff81d0: fd fd fd fa fa fa fd fd fd fa fa fa fd fd fd fa 0x0c067fff81e0: fa fa fd fd fd fd fa fa fd fd fd fd fa fa fd fd 0x0c067fff81f0: fd fa fa fa fd fd fd fa fa fa fd fd fd fa fa fa 0x0c067fff8200: fd fd fd fa fa fa fd fd fd fd fa fa 00 00 00 fa 0x0c067fff8210: fa fa fd fd fd fa fa fa fd fd fd fa fa fa fd fd =>0x0c067fff8220: fd fa fa fa fd fd fd fa fa fa 00 00 00[fa]fa fa 0x0c067fff8230: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c067fff8240: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c067fff8250: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c067fff8260: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c067fff8270: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa Shadow byte legend (one shadow byte represents 8 application bytes): Addressable: 00 Partially addressable: 01 02 03 04 05 06 07 Heap left redzone: fa Freed heap region: fd Stack left redzone: f1 Stack mid redzone: f2 Stack right redzone: f3 Stack after return: f5 Stack use after scope: f8 Global redzone: f9 Global init order: f6 Poisoned by user: f7 Container overflow: fc Array cookie: ac Intra object redzone: bb ASan internal: fe Left alloca redzone: ca Right alloca redzone: cb ==26009==ABORTING Now the proper fix for this would be to convert both functions to return an `size_t` instead of an `int`. But given that this commit may be part of a security release, let's instead do the minimal viable fix and die in case we see an overflow. Add a test that would have previously caused us to crash. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- t/t4205-log-pretty-formats.sh | 8 ++++++++ utf8.c | 12 +++++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/t/t4205-log-pretty-formats.sh b/t/t4205-log-pretty-formats.sh index e3905baa3c4..aac9e4ce6cd 100755 --- a/t/t4205-log-pretty-formats.sh +++ b/t/t4205-log-pretty-formats.sh @@ -922,4 +922,12 @@ test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'log --pretty with huge commit mes test_cmp expect actual ' +test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'log --pretty with huge commit message does not cause allocation failure' ' + test_must_fail git log -1 --format="%<(1)%B" $huge_commit 2>error && + cat >expect <<-EOF && + fatal: number too large to represent as int on this platform: 2147483649 + EOF + test_cmp expect error +' + test_done diff --git a/utf8.c b/utf8.c index 6a21fd6a7b0..30c7787cfa9 100644 --- a/utf8.c +++ b/utf8.c @@ -208,11 +208,12 @@ int utf8_width(const char **start, size_t *remainder_p) */ int utf8_strnwidth(const char *string, size_t len, int skip_ansi) { - int width = 0; const char *orig = string; + size_t width = 0; while (string && string < orig + len) { - int glyph_width, skip; + int glyph_width; + size_t skip; while (skip_ansi && (skip = display_mode_esc_sequence_len(string)) != 0) @@ -222,7 +223,12 @@ int utf8_strnwidth(const char *string, size_t len, int skip_ansi) if (glyph_width > 0) width += glyph_width; } - return string ? width : len; + + /* + * TODO: fix the interface of this function and `utf8_strwidth()` to + * return `size_t` instead of `int`. + */ + return cast_size_t_to_int(string ? width : len); } int utf8_strwidth(const char *string) From 81c2d4c3a5ba0e6ab8c348708441fed170e63a82 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:47:10 +0100 Subject: [PATCH 20/30] utf8: fix checking for glyph width in `strbuf_utf8_replace()` In `strbuf_utf8_replace()`, we call `utf8_width()` to compute the width of the current glyph. If the glyph is a control character though it can be that `utf8_width()` returns `-1`, but because we assign this value to a `size_t` the conversion will cause us to underflow. This bug can easily be triggered with the following command: $ git log --pretty='format:xxx%<|(1,trunc)%x10' >From all I can see though this seems to be a benign underflow that has no security-related consequences. Fix the bug by using an `int` instead. When we see a control character, we now copy it into the target buffer but don't advance the current width of the string. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- t/t4205-log-pretty-formats.sh | 7 +++++++ utf8.c | 19 ++++++++++++++----- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/t/t4205-log-pretty-formats.sh b/t/t4205-log-pretty-formats.sh index aac9e4ce6cd..5c5b56596e8 100755 --- a/t/t4205-log-pretty-formats.sh +++ b/t/t4205-log-pretty-formats.sh @@ -905,6 +905,13 @@ test_expect_success 'log --pretty with padding and preceding control chars' ' test_cmp expect actual ' +test_expect_success 'log --pretty truncation with control chars' ' + test_commit "$(printf "\20\20\20\20xxxx")" file contents commit-with-control-chars && + printf "\20\20\20\20x.." >expect && + git log -1 --pretty="format:%<(3,trunc)%s" commit-with-control-chars >actual && + test_cmp expect actual +' + test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'log --pretty with huge commit message' ' # We only assert that this command does not crash. This needs to be # executed with the address sanitizer to demonstrate failure. diff --git a/utf8.c b/utf8.c index 30c7787cfa9..077daf4b20a 100644 --- a/utf8.c +++ b/utf8.c @@ -377,6 +377,7 @@ void strbuf_utf8_replace(struct strbuf *sb_src, int pos, int width, dst = sb_dst.buf; while (src < end) { + int glyph_width; char *old; size_t n; @@ -390,21 +391,29 @@ void strbuf_utf8_replace(struct strbuf *sb_src, int pos, int width, break; old = src; - n = utf8_width((const char**)&src, NULL); - if (!src) /* broken utf-8, do nothing */ + glyph_width = utf8_width((const char**)&src, NULL); + if (!src) /* broken utf-8, do nothing */ goto out; - if (n && w >= pos && w < pos + width) { + + /* + * In case we see a control character we copy it into the + * buffer, but don't add it to the width. + */ + if (glyph_width < 0) + glyph_width = 0; + + if (glyph_width && w >= pos && w < pos + width) { if (subst) { memcpy(dst, subst, subst_len); dst += subst_len; subst = NULL; } - w += n; + w += glyph_width; continue; } memcpy(dst, old, src - old); dst += src - old; - w += n; + w += glyph_width; } strbuf_setlen(&sb_dst, dst - sb_dst.buf); strbuf_swap(sb_src, &sb_dst); From f930a2394303b902e2973f4308f96529f736b8bc Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:47:15 +0100 Subject: [PATCH 21/30] utf8: refactor `strbuf_utf8_replace` to not rely on preallocated buffer In `strbuf_utf8_replace`, we preallocate the destination buffer and then use `memcpy` to copy bytes into it at computed offsets. This feels rather fragile and is hard to understand at times. Refactor the code to instead use `strbuf_add` and `strbuf_addstr` so that we can be sure that there is no possibility to perform an out-of-bounds write. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- utf8.c | 34 +++++++++++++--------------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/utf8.c b/utf8.c index 077daf4b20a..d8a16af87c9 100644 --- a/utf8.c +++ b/utf8.c @@ -365,26 +365,20 @@ void strbuf_add_wrapped_bytes(struct strbuf *buf, const char *data, int len, void strbuf_utf8_replace(struct strbuf *sb_src, int pos, int width, const char *subst) { - struct strbuf sb_dst = STRBUF_INIT; - char *src = sb_src->buf; - char *end = src + sb_src->len; - char *dst; - int w = 0, subst_len = 0; + const char *src = sb_src->buf, *end = sb_src->buf + sb_src->len; + struct strbuf dst; + int w = 0; - if (subst) - subst_len = strlen(subst); - strbuf_grow(&sb_dst, sb_src->len + subst_len); - dst = sb_dst.buf; + strbuf_init(&dst, sb_src->len); while (src < end) { + const char *old; int glyph_width; - char *old; size_t n; while ((n = display_mode_esc_sequence_len(src))) { - memcpy(dst, src, n); + strbuf_add(&dst, src, n); src += n; - dst += n; } if (src >= end) @@ -404,21 +398,19 @@ void strbuf_utf8_replace(struct strbuf *sb_src, int pos, int width, if (glyph_width && w >= pos && w < pos + width) { if (subst) { - memcpy(dst, subst, subst_len); - dst += subst_len; + strbuf_addstr(&dst, subst); subst = NULL; } - w += glyph_width; - continue; + } else { + strbuf_add(&dst, old, src - old); } - memcpy(dst, old, src - old); - dst += src - old; + w += glyph_width; } - strbuf_setlen(&sb_dst, dst - sb_dst.buf); - strbuf_swap(sb_src, &sb_dst); + + strbuf_swap(sb_src, &dst); out: - strbuf_release(&sb_dst); + strbuf_release(&dst); } /* From 304a50adff6480ede46b68f7545baab542cbfb46 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:47:23 +0100 Subject: [PATCH 22/30] pretty: restrict input lengths for padding and wrapping formats Both the padding and wrapping formatting directives allow the caller to specify an integer that ultimately leads to us adding this many chars to the result buffer. As a consequence, it is trivial to e.g. allocate 2GB of RAM via a single formatting directive and cause resource exhaustion on the machine executing this logic. Furthermore, it is debatable whether there are any sane usecases that require the user to pad data to 2GB boundaries or to indent wrapped data by 2GB. Restrict the input sizes to 16 kilobytes at a maximum to limit the amount of bytes that can be requested by the user. This is not meant as a fix because there are ways to trivially amplify the amount of data we generate via formatting directives; the real protection is achieved by the changes in previous steps to catch and avoid integer wraparound that causes us to under-allocate and access beyond the end of allocated memory reagions. But having such a limit significantly helps fuzzing the pretty format, because the fuzzer is otherwise quite fast to run out-of-memory as it discovers these formatters. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- pretty.c | 26 ++++++++++++++++++++++++++ t/t4205-log-pretty-formats.sh | 24 +++++++++++++++--------- 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/pretty.c b/pretty.c index aae6e792bc4..e2285572c49 100644 --- a/pretty.c +++ b/pretty.c @@ -13,6 +13,13 @@ #include "gpg-interface.h" #include "trailer.h" +/* + * The limit for formatting directives, which enable the caller to append + * arbitrarily many bytes to the formatted buffer. This includes padding + * and wrapping formatters. + */ +#define FORMATTING_LIMIT (16 * 1024) + static char *user_format; static struct cmt_fmt_map { const char *name; @@ -1046,6 +1053,15 @@ static size_t parse_padding_placeholder(const char *placeholder, if (!*end || end == start) return 0; width = strtol(start, &next, 10); + + /* + * We need to limit the amount of padding, or otherwise this + * would allow the user to pad the buffer by arbitrarily many + * bytes and thus cause resource exhaustion. + */ + if (width < -FORMATTING_LIMIT || width > FORMATTING_LIMIT) + return 0; + if (next == start || width == 0) return 0; if (width < 0) { @@ -1205,6 +1221,16 @@ static size_t format_commit_one(struct strbuf *sb, /* in UTF-8 */ if (*next != ')') return 0; } + + /* + * We need to limit the format here as it allows the + * user to prepend arbitrarily many bytes to the buffer + * when rewrapping. + */ + if (width > FORMATTING_LIMIT || + indent1 > FORMATTING_LIMIT || + indent2 > FORMATTING_LIMIT) + return 0; rewrap_message_tail(sb, c, width, indent1, indent2); return end - placeholder + 1; } else diff --git a/t/t4205-log-pretty-formats.sh b/t/t4205-log-pretty-formats.sh index 5c5b56596e8..84c61dfc489 100755 --- a/t/t4205-log-pretty-formats.sh +++ b/t/t4205-log-pretty-formats.sh @@ -888,15 +888,21 @@ test_expect_success 'log --pretty with magical wrapping directives' ' ' test_expect_success SIZE_T_IS_64BIT 'log --pretty with overflowing wrapping directive' ' - cat >expect <<-EOF && - fatal: number too large to represent as int on this platform: 2147483649 - EOF - test_must_fail git log -1 --pretty="format:%w(2147483649,1,1)%d" 2>error && - test_cmp expect error && - test_must_fail git log -1 --pretty="format:%w(1,2147483649,1)%d" 2>error && - test_cmp expect error && - test_must_fail git log -1 --pretty="format:%w(1,1,2147483649)%d" 2>error && - test_cmp expect error + printf "%%w(2147483649,1,1)0" >expect && + git log -1 --pretty="format:%w(2147483649,1,1)%x30" >actual && + test_cmp expect actual && + printf "%%w(1,2147483649,1)0" >expect && + git log -1 --pretty="format:%w(1,2147483649,1)%x30" >actual && + test_cmp expect actual && + printf "%%w(1,1,2147483649)0" >expect && + git log -1 --pretty="format:%w(1,1,2147483649)%x30" >actual && + test_cmp expect actual +' + +test_expect_success SIZE_T_IS_64BIT 'log --pretty with overflowing padding directive' ' + printf "%%<(2147483649)0" >expect && + git log -1 --pretty="format:%<(2147483649)%x30" >actual && + test_cmp expect actual ' test_expect_success 'log --pretty with padding and preceding control chars' ' From bb3a9265e505e9593faa260860f9b8929af0963e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:45:57 +0100 Subject: [PATCH 23/30] fsck: refactor `fsck_blob()` to allow for more checks In general, we don't need to validate blob contents as they are opaque blobs about whose content Git doesn't need to care about. There are some exceptions though when blobs are linked into trees so that they would be interpreted by Git. We only have a single such check right now though, which is the one for gitmodules that has been added in the context of CVE-2018-11235. Now we have found another vulnerability with gitattributes that can lead to out-of-bounds writes and reads. So let's refactor `fsck_blob()` so that it is more extensible and can check different types of blobs. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- fsck.c | 55 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/fsck.c b/fsck.c index 3ec500d707a..ddcb2d264cd 100644 --- a/fsck.c +++ b/fsck.c @@ -1170,38 +1170,41 @@ static int fsck_gitmodules_fn(const char *var, const char *value, void *vdata) static int fsck_blob(const struct object_id *oid, const char *buf, unsigned long size, struct fsck_options *options) { - struct fsck_gitmodules_data data; - struct config_options config_opts = { 0 }; - - if (!oidset_contains(&options->gitmodules_found, oid)) - return 0; - oidset_insert(&options->gitmodules_done, oid); + int ret = 0; if (object_on_skiplist(options, oid)) return 0; - if (!buf) { - /* - * A missing buffer here is a sign that the caller found the - * blob too gigantic to load into memory. Let's just consider - * that an error. - */ - return report(options, oid, OBJ_BLOB, - FSCK_MSG_GITMODULES_LARGE, - ".gitmodules too large to parse"); + if (oidset_contains(&options->gitmodules_found, oid)) { + struct config_options config_opts = { 0 }; + struct fsck_gitmodules_data data; + + oidset_insert(&options->gitmodules_done, oid); + + if (!buf) { + /* + * A missing buffer here is a sign that the caller found the + * blob too gigantic to load into memory. Let's just consider + * that an error. + */ + return report(options, oid, OBJ_BLOB, + FSCK_MSG_GITMODULES_LARGE, + ".gitmodules too large to parse"); + } + + data.oid = oid; + data.options = options; + data.ret = 0; + config_opts.error_action = CONFIG_ERROR_SILENT; + if (git_config_from_mem(fsck_gitmodules_fn, CONFIG_ORIGIN_BLOB, + ".gitmodules", buf, size, &data, &config_opts)) + data.ret |= report(options, oid, OBJ_BLOB, + FSCK_MSG_GITMODULES_PARSE, + "could not parse gitmodules blob"); + ret |= data.ret; } - data.oid = oid; - data.options = options; - data.ret = 0; - config_opts.error_action = CONFIG_ERROR_SILENT; - if (git_config_from_mem(fsck_gitmodules_fn, CONFIG_ORIGIN_BLOB, - ".gitmodules", buf, size, &data, &config_opts)) - data.ret |= report(options, oid, OBJ_BLOB, - FSCK_MSG_GITMODULES_PARSE, - "could not parse gitmodules blob"); - - return data.ret; + return ret; } int fsck_object(struct object *obj, void *data, unsigned long size, From a59a8c687f18db2b4c54a9d0795f93c4df1f9703 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:46:01 +0100 Subject: [PATCH 24/30] fsck: pull out function to check a set of blobs In `fsck_finish()` we check all blobs for consistency that we have found during the tree walk, but that haven't yet been checked. This is only required for gitmodules right now, but will also be required for a new check for gitattributes. Pull out a function `fsck_blobs()` that allows the caller to check a set of blobs for consistency. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- fsck.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/fsck.c b/fsck.c index ddcb2d264cd..4762ca9478a 100644 --- a/fsck.c +++ b/fsck.c @@ -1243,19 +1243,21 @@ int fsck_error_function(struct fsck_options *o, return 1; } -int fsck_finish(struct fsck_options *options) +static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done, + enum fsck_msg_id msg_missing, enum fsck_msg_id msg_type, + struct fsck_options *options, const char *blob_type) { int ret = 0; struct oidset_iter iter; const struct object_id *oid; - oidset_iter_init(&options->gitmodules_found, &iter); + oidset_iter_init(blobs_found, &iter); while ((oid = oidset_iter_next(&iter))) { enum object_type type; unsigned long size; char *buf; - if (oidset_contains(&options->gitmodules_done, oid)) + if (oidset_contains(blobs_done, oid)) continue; buf = read_object_file(oid, &type, &size); @@ -1263,25 +1265,33 @@ int fsck_finish(struct fsck_options *options) if (is_promisor_object(oid)) continue; ret |= report(options, - oid, OBJ_BLOB, - FSCK_MSG_GITMODULES_MISSING, - "unable to read .gitmodules blob"); + oid, OBJ_BLOB, msg_missing, + "unable to read %s blob", blob_type); continue; } if (type == OBJ_BLOB) ret |= fsck_blob(oid, buf, size, options); else - ret |= report(options, - oid, type, - FSCK_MSG_GITMODULES_BLOB, - "non-blob found at .gitmodules"); + ret |= report(options, oid, type, msg_type, + "non-blob found at %s", blob_type); free(buf); } + oidset_clear(blobs_found); + oidset_clear(blobs_done); + + return ret; +} + +int fsck_finish(struct fsck_options *options) +{ + int ret = 0; + + ret |= fsck_blobs(&options->gitmodules_found, &options->gitmodules_done, + FSCK_MSG_GITMODULES_MISSING, FSCK_MSG_GITMODULES_BLOB, + options, ".gitmodules"); - oidset_clear(&options->gitmodules_found); - oidset_clear(&options->gitmodules_done); return ret; } From f8587c31c96172aac547f83977c98fa8f0e2aa67 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:46:05 +0100 Subject: [PATCH 25/30] fsck: move checks for gitattributes Move the checks for gitattributes so that they can be extended more readily. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- fsck.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fsck.c b/fsck.c index 4762ca9478a..3a7fb9ebbaf 100644 --- a/fsck.c +++ b/fsck.c @@ -614,17 +614,19 @@ static int fsck_tree(const struct object_id *tree_oid, ".gitmodules is a symbolic link"); } + if (is_hfs_dotgitattributes(name) || is_ntfs_dotgitattributes(name)) { + if (S_ISLNK(mode)) + retval += report(options, tree_oid, OBJ_TREE, + FSCK_MSG_GITATTRIBUTES_SYMLINK, + ".gitattributes is a symlink"); + } + if (S_ISLNK(mode)) { if (is_hfs_dotgitignore(name) || is_ntfs_dotgitignore(name)) retval += report(options, tree_oid, OBJ_TREE, FSCK_MSG_GITIGNORE_SYMLINK, ".gitignore is a symlink"); - if (is_hfs_dotgitattributes(name) || - is_ntfs_dotgitattributes(name)) - retval += report(options, tree_oid, OBJ_TREE, - FSCK_MSG_GITATTRIBUTES_SYMLINK, - ".gitattributes is a symlink"); if (is_hfs_dotmailmap(name) || is_ntfs_dotmailmap(name)) retval += report(options, tree_oid, OBJ_TREE, From 27ab4784d5c9e24345b9f5b443609cbe527c51f9 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 1 Dec 2022 15:46:09 +0100 Subject: [PATCH 26/30] fsck: implement checks for gitattributes Recently, a vulnerability was reported that can lead to an out-of-bounds write when reading an unreasonably large gitattributes file. The root cause of this error are multiple integer overflows in different parts of the code when there are either too many lines, when paths are too long, when attribute names are too long, or when there are too many attributes declared for a pattern. As all of these are related to size, it seems reasonable to restrict the size of the gitattributes file via git-fsck(1). This allows us to both stop distributing known-vulnerable objects via common hosting platforms that have fsck enabled, and users to protect themselves by enabling the `fetch.fsckObjects` config. There are basically two checks: 1. We verify that size of the gitattributes file is smaller than 100MB. 2. We verify that the maximum line length does not exceed 2048 bytes. With the preceding commits, both of these conditions would cause us to either ignore the complete gitattributes file or blob in the first case, or the specific line in the second case. Now with these consistency checks added, we also grow the ability to stop distributing such files in the first place when `receive.fsckObjects` is enabled. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- fsck.c | 38 +++++++++++++++++++++++++++++++++++++- fsck.h | 12 ++++++++++++ t/t1450-fsck.sh | 24 ++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 1 deletion(-) diff --git a/fsck.c b/fsck.c index 3a7fb9ebbaf..614c7764294 100644 --- a/fsck.c +++ b/fsck.c @@ -2,6 +2,7 @@ #include "object-store.h" #include "repository.h" #include "object.h" +#include "attr.h" #include "blob.h" #include "tree.h" #include "tree-walk.h" @@ -615,7 +616,10 @@ static int fsck_tree(const struct object_id *tree_oid, } if (is_hfs_dotgitattributes(name) || is_ntfs_dotgitattributes(name)) { - if (S_ISLNK(mode)) + if (!S_ISLNK(mode)) + oidset_insert(&options->gitattributes_found, + entry_oid); + else retval += report(options, tree_oid, OBJ_TREE, FSCK_MSG_GITATTRIBUTES_SYMLINK, ".gitattributes is a symlink"); @@ -1206,6 +1210,35 @@ static int fsck_blob(const struct object_id *oid, const char *buf, ret |= data.ret; } + if (oidset_contains(&options->gitattributes_found, oid)) { + const char *ptr; + + oidset_insert(&options->gitattributes_done, oid); + + if (!buf || size > ATTR_MAX_FILE_SIZE) { + /* + * A missing buffer here is a sign that the caller found the + * blob too gigantic to load into memory. Let's just consider + * that an error. + */ + return report(options, oid, OBJ_BLOB, + FSCK_MSG_GITATTRIBUTES_LARGE, + ".gitattributes too large to parse"); + } + + for (ptr = buf; *ptr; ) { + const char *eol = strchrnul(ptr, '\n'); + if (eol - ptr >= ATTR_MAX_LINE_LENGTH) { + ret |= report(options, oid, OBJ_BLOB, + FSCK_MSG_GITATTRIBUTES_LINE_LENGTH, + ".gitattributes has too long lines to parse"); + break; + } + + ptr = *eol ? eol + 1 : eol; + } + } + return ret; } @@ -1293,6 +1326,9 @@ int fsck_finish(struct fsck_options *options) ret |= fsck_blobs(&options->gitmodules_found, &options->gitmodules_done, FSCK_MSG_GITMODULES_MISSING, FSCK_MSG_GITMODULES_BLOB, options, ".gitmodules"); + ret |= fsck_blobs(&options->gitattributes_found, &options->gitattributes_done, + FSCK_MSG_GITATTRIBUTES_MISSING, FSCK_MSG_GITATTRIBUTES_BLOB, + options, ".gitattributes"); return ret; } diff --git a/fsck.h b/fsck.h index d07f7a2459e..cc3379d4e93 100644 --- a/fsck.h +++ b/fsck.h @@ -55,6 +55,10 @@ enum fsck_msg_type { FUNC(GITMODULES_URL, ERROR) \ FUNC(GITMODULES_PATH, ERROR) \ FUNC(GITMODULES_UPDATE, ERROR) \ + FUNC(GITATTRIBUTES_MISSING, ERROR) \ + FUNC(GITATTRIBUTES_LARGE, ERROR) \ + FUNC(GITATTRIBUTES_LINE_LENGTH, ERROR) \ + FUNC(GITATTRIBUTES_BLOB, ERROR) \ /* warnings */ \ FUNC(BAD_FILEMODE, WARN) \ FUNC(EMPTY_NAME, WARN) \ @@ -129,6 +133,8 @@ struct fsck_options { struct oidset skiplist; struct oidset gitmodules_found; struct oidset gitmodules_done; + struct oidset gitattributes_found; + struct oidset gitattributes_done; kh_oid_map_t *object_names; }; @@ -136,18 +142,24 @@ struct fsck_options { .skiplist = OIDSET_INIT, \ .gitmodules_found = OIDSET_INIT, \ .gitmodules_done = OIDSET_INIT, \ + .gitattributes_found = OIDSET_INIT, \ + .gitattributes_done = OIDSET_INIT, \ .error_func = fsck_error_function \ } #define FSCK_OPTIONS_STRICT { \ .strict = 1, \ .gitmodules_found = OIDSET_INIT, \ .gitmodules_done = OIDSET_INIT, \ + .gitattributes_found = OIDSET_INIT, \ + .gitattributes_done = OIDSET_INIT, \ .error_func = fsck_error_function, \ } #define FSCK_OPTIONS_MISSING_GITMODULES { \ .strict = 1, \ .gitmodules_found = OIDSET_INIT, \ .gitmodules_done = OIDSET_INIT, \ + .gitattributes_found = OIDSET_INIT, \ + .gitattributes_done = OIDSET_INIT, \ .error_func = fsck_error_cb_print_missing_gitmodules, \ } diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index 5071ac63a5b..9e0afe1fbf8 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -865,4 +865,28 @@ test_expect_success 'detect corrupt index file in fsck' ' test_i18ngrep "bad index file" errors ' +test_expect_success 'fsck error on gitattributes with excessive line lengths' ' + blob=$(printf "pattern %02048d" 1 | git hash-object -w --stdin) && + test_when_finished "remove_object $blob" && + tree=$(printf "100644 blob %s\t%s\n" $blob .gitattributes | git mktree) && + test_when_finished "remove_object $tree" && + cat >expected <<-EOF && + error in blob $blob: gitattributesLineLength: .gitattributes has too long lines to parse + EOF + test_must_fail git fsck --no-dangling >actual 2>&1 && + test_cmp expected actual +' + +test_expect_success 'fsck error on gitattributes with excessive size' ' + blob=$(test-tool genzeros $((100 * 1024 * 1024 + 1)) | git hash-object -w --stdin) && + test_when_finished "remove_object $blob" && + tree=$(printf "100644 blob %s\t%s\n" $blob .gitattributes | git mktree) && + test_when_finished "remove_object $tree" && + cat >expected <<-EOF && + error in blob $blob: gitattributesLarge: .gitattributes too large to parse + EOF + test_must_fail git fsck --no-dangling >actual 2>&1 && + test_cmp expected actual +' + test_done From b7b37a33711e64bf580ef3141878b12a36e28833 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 13 Dec 2022 20:56:43 +0900 Subject: [PATCH 27/30] Git 2.30.7 Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.30.7.txt | 86 +++++++++++++++++++++++++++++++ GIT-VERSION-GEN | 2 +- RelNotes | 2 +- 3 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 Documentation/RelNotes/2.30.7.txt diff --git a/Documentation/RelNotes/2.30.7.txt b/Documentation/RelNotes/2.30.7.txt new file mode 100644 index 00000000000..285beed2322 --- /dev/null +++ b/Documentation/RelNotes/2.30.7.txt @@ -0,0 +1,86 @@ +Git v2.30.7 Release Notes +========================= + +This release addresses the security issues CVE-2022-41903 and +CVE-2022-23521. + + +Fixes since v2.30.6 +------------------- + + * CVE-2022-41903: + + git log has the ability to display commits using an arbitrary + format with its --format specifiers. This functionality is also + exposed to git archive via the export-subst gitattribute. + + When processing the padding operators (e.g., %<(, %<|(, %>(, + %>>(, or %><( ), an integer overflow can occur in + pretty.c::format_and_pad_commit() where a size_t is improperly + stored as an int, and then added as an offset to a subsequent + memcpy() call. + + This overflow can be triggered directly by a user running a + command which invokes the commit formatting machinery (e.g., git + log --format=...). It may also be triggered indirectly through + git archive via the export-subst mechanism, which expands format + specifiers inside of files within the repository during a git + archive. + + This integer overflow can result in arbitrary heap writes, which + may result in remote code execution. + +* CVE-2022-23521: + + gitattributes are a mechanism to allow defining attributes for + paths. These attributes can be defined by adding a `.gitattributes` + file to the repository, which contains a set of file patterns and + the attributes that should be set for paths matching this pattern. + + When parsing gitattributes, multiple integer overflows can occur + when there is a huge number of path patterns, a huge number of + attributes for a single pattern, or when the declared attribute + names are huge. + + These overflows can be triggered via a crafted `.gitattributes` file + that may be part of the commit history. Git silently splits lines + longer than 2KB when parsing gitattributes from a file, but not when + parsing them from the index. Consequentially, the failure mode + depends on whether the file exists in the working tree, the index or + both. + + This integer overflow can result in arbitrary heap reads and writes, + which may result in remote code execution. + +Credit for finding CVE-2022-41903 goes to Joern Schneeweisz of GitLab. +An initial fix was authored by Markus Vervier of X41 D-Sec. Credit for +finding CVE-2022-23521 goes to Markus Vervier and Eric Sesterhenn of X41 +D-Sec. This work was sponsored by OSTIF. + +The proposed fixes have been polished and extended to cover additional +findings by Patrick Steinhardt of GitLab, with help from others on the +Git security mailing list. + +Patrick Steinhardt (21): + attr: fix overflow when upserting attribute with overly long name + attr: fix out-of-bounds read with huge attribute names + attr: fix integer overflow when parsing huge attribute names + attr: fix out-of-bounds write when parsing huge number of attributes + attr: fix out-of-bounds read with unreasonable amount of patterns + attr: fix integer overflow with more than INT_MAX macros + attr: harden allocation against integer overflows + attr: fix silently splitting up lines longer than 2048 bytes + attr: ignore attribute lines exceeding 2048 bytes + attr: ignore overly large gitattributes files + pretty: fix out-of-bounds write caused by integer overflow + pretty: fix out-of-bounds read when left-flushing with stealing + pretty: fix out-of-bounds read when parsing invalid padding format + pretty: fix adding linefeed when placeholder is not expanded + pretty: fix integer overflow in wrapping format + utf8: fix truncated string lengths in `utf8_strnwidth()` + utf8: fix returning negative string width + utf8: fix overflow when returning string width + utf8: fix checking for glyph width in `strbuf_utf8_replace()` + utf8: refactor `strbuf_utf8_replace` to not rely on preallocated buffer + pretty: restrict input lengths for padding and wrapping formats + diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN index 40fa0b5255b..9ab3517e295 100755 --- a/GIT-VERSION-GEN +++ b/GIT-VERSION-GEN @@ -1,7 +1,7 @@ #!/bin/sh GVF=GIT-VERSION-FILE -DEF_VER=v2.30.6 +DEF_VER=v2.30.7 LF=' ' diff --git a/RelNotes b/RelNotes index 018a28eb07c..253d84ff9d4 120000 --- a/RelNotes +++ b/RelNotes @@ -1 +1 @@ -Documentation/RelNotes/2.30.6.txt \ No newline at end of file +Documentation/RelNotes/2.30.7.txt \ No newline at end of file From 82689d5e5d3f41da2ab1fbf9fbe7aacfd6da74c1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 13 Dec 2022 21:04:03 +0900 Subject: [PATCH 28/30] Git 2.31.6 Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.31.6.txt | 5 +++++ GIT-VERSION-GEN | 2 +- RelNotes | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 Documentation/RelNotes/2.31.6.txt diff --git a/Documentation/RelNotes/2.31.6.txt b/Documentation/RelNotes/2.31.6.txt new file mode 100644 index 00000000000..425a51875a1 --- /dev/null +++ b/Documentation/RelNotes/2.31.6.txt @@ -0,0 +1,5 @@ +Git v2.31.6 Release Notes +========================= + +This release merges the security fix that appears in v2.30.7; see +the release notes for that version for details. diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN index 91a5ae77393..7e159104b2a 100755 --- a/GIT-VERSION-GEN +++ b/GIT-VERSION-GEN @@ -1,7 +1,7 @@ #!/bin/sh GVF=GIT-VERSION-FILE -DEF_VER=v2.31.5 +DEF_VER=v2.31.6 LF=' ' diff --git a/RelNotes b/RelNotes index 6ed6c0c0144..e25264ca354 120000 --- a/RelNotes +++ b/RelNotes @@ -1 +1 @@ -Documentation/RelNotes/2.31.5.txt \ No newline at end of file +Documentation/RelNotes/2.31.6.txt \ No newline at end of file From d96ea538e8dd0fcf381089a3b09c0a9af3617351 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 13 Dec 2022 21:10:27 +0900 Subject: [PATCH 29/30] Git 2.32.5 Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.32.5.txt | 8 ++++++++ GIT-VERSION-GEN | 2 +- RelNotes | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) create mode 100644 Documentation/RelNotes/2.32.5.txt diff --git a/Documentation/RelNotes/2.32.5.txt b/Documentation/RelNotes/2.32.5.txt new file mode 100644 index 00000000000..a8cad1a05b7 --- /dev/null +++ b/Documentation/RelNotes/2.32.5.txt @@ -0,0 +1,8 @@ +Git v2.32.5 Release Notes +========================= + +This release merges the security fix that appears in v2.30.7; see +the release notes for that version for details. + +In addition, included are additional code for "git fsck" to check +for questionable .gitattributes files. diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN index 076cdd3151c..3d2538de853 100755 --- a/GIT-VERSION-GEN +++ b/GIT-VERSION-GEN @@ -1,7 +1,7 @@ #!/bin/sh GVF=GIT-VERSION-FILE -DEF_VER=v2.32.4 +DEF_VER=v2.32.5 LF=' ' diff --git a/RelNotes b/RelNotes index 2626c7807b4..e60115fd82a 120000 --- a/RelNotes +++ b/RelNotes @@ -1 +1 @@ -Documentation/RelNotes/2.32.4.txt \ No newline at end of file +Documentation/RelNotes/2.32.5.txt \ No newline at end of file From 7fe9bf55b84d2610a7ac09893b25ef188f145a21 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 13 Dec 2022 21:13:48 +0900 Subject: [PATCH 30/30] Git 2.33.6 Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.33.6.txt | 5 +++++ GIT-VERSION-GEN | 2 +- RelNotes | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 Documentation/RelNotes/2.33.6.txt diff --git a/Documentation/RelNotes/2.33.6.txt b/Documentation/RelNotes/2.33.6.txt new file mode 100644 index 00000000000..b63e4e62564 --- /dev/null +++ b/Documentation/RelNotes/2.33.6.txt @@ -0,0 +1,5 @@ +Git v2.33.6 Release Notes +========================= + +This release merges the security fix that appears in v2.30.7; see +the release notes for that version for details. diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN index d7948f16534..08677a66f5f 100755 --- a/GIT-VERSION-GEN +++ b/GIT-VERSION-GEN @@ -1,7 +1,7 @@ #!/bin/sh GVF=GIT-VERSION-FILE -DEF_VER=v2.33.5 +DEF_VER=v2.33.6 LF=' ' diff --git a/RelNotes b/RelNotes index 9589e5e0e91..f0458d7556c 120000 --- a/RelNotes +++ b/RelNotes @@ -1 +1 @@ -Documentation/RelNotes/2.33.5.txt \ No newline at end of file +Documentation/RelNotes/2.33.6.txt \ No newline at end of file