From 23707811c56a7756cbd6188e510f0a486c35c929 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 2 Jan 2008 01:50:11 -0800 Subject: [PATCH] diff: do not chomp hunk-header in the middle of a character We truncate hunk-header line at 80 bytes, but that 80th byte could be in the middle of a character, which is bad. This uses pick_one_utf8_char() function to make sure we do not cut a character in the middle. This assumes that the internal representation of the text is UTF-8. This needs to be extended in the future but the optimal direction has not been decided yet. Signed-off-by: Junio C Hamano --- diff.c | 25 ++++++++++++++++++++++++ t/t4025-hunk-header.sh | 44 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100755 t/t4025-hunk-header.sh diff --git a/diff.c b/diff.c index 2c78d74a42..b18c140e3d 100644 --- a/diff.c +++ b/diff.c @@ -10,6 +10,7 @@ #include "color.h" #include "attr.h" #include "run-command.h" +#include "utf8.h" #ifdef NO_FAST_WORKING_DIRECTORY #define FAST_WORKING_DIRECTORY 0 @@ -469,10 +470,13 @@ static void diff_words_show(struct diff_words_data *diff_words) } } +typedef unsigned long (*sane_truncate_fn)(char *line, unsigned long len); + struct emit_callback { struct xdiff_emit_state xm; int nparents, color_diff; unsigned ws_rule; + sane_truncate_fn truncate; const char **label_path; struct diff_words_data *diff_words; int *found_changesp; @@ -525,6 +529,24 @@ static void emit_add_line(const char *reset, struct emit_callback *ecbdata, cons } } +static unsigned long sane_truncate_line(struct emit_callback *ecb, char *line, unsigned long len) +{ + const char *cp; + unsigned long allot; + size_t l = len; + + if (ecb->truncate) + return ecb->truncate(line, len); + cp = line; + allot = l; + while (0 < l) { + (void) utf8_width(&cp, &l); + if (!cp) + break; /* truncated in the middle? */ + } + return allot - l; +} + static void fn_out_consume(void *priv, char *line, unsigned long len) { int i; @@ -555,8 +577,11 @@ static void fn_out_consume(void *priv, char *line, unsigned long len) ; if (2 <= i && i < len && line[i] == ' ') { ecbdata->nparents = i - 1; + len = sane_truncate_line(ecbdata, line, len); emit_line(diff_get_color(ecbdata->color_diff, DIFF_FRAGINFO), reset, line, len); + if (line[len-1] != '\n') + putchar('\n'); return; } diff --git a/t/t4025-hunk-header.sh b/t/t4025-hunk-header.sh new file mode 100755 index 0000000000..9ba06b74ce --- /dev/null +++ b/t/t4025-hunk-header.sh @@ -0,0 +1,44 @@ +#!/bin/sh + +test_description='diff hunk header truncation' + +. ./test-lib.sh + +N='日本語' +N1='日' +N2='日本' +NS="$N$N$N$N$N$N$N$N$N$N$N$N$N" + +test_expect_success setup ' + + ( + echo "A $NS" + for c in B C D E F G H I J K + do + echo " $c" + done + echo "L $NS" + for c in M N O P Q R S T U V + do + echo " $c" + done + ) >file && + git add file && + + sed -e "/^ [EP]/s/$/ modified/" file+ && + mv file+ file + +' + +test_expect_success 'hunk header truncation with an overly long line' ' + + git diff | sed -n -e "s/^.*@@//p" >actual && + ( + echo " A $N$N$N$N$N$N$N$N$N2" + echo " L $N$N$N$N$N$N$N$N$N1" + ) >expected && + diff -u actual expected + +' + +test_done