Merge branch 'ag/blame-encoding'

* ag/blame-encoding:
  builtin-blame: Reencode commit messages according to git-log rules.
This commit is contained in:
Junio C Hamano 2008-11-02 16:36:30 -08:00
commit 0eb5ebc1d8
9 changed files with 136 additions and 14 deletions

View File

@ -49,6 +49,13 @@ of lines before or after the line given by <start>.
Show the result incrementally in a format designed for Show the result incrementally in a format designed for
machine consumption. machine consumption.
--encoding=<encoding>::
Specifies the encoding used to output author names
and commit summaries. Setting it to `none` makes blame
output unconverted data. For more information see the
discussion about encoding in the linkgit:git-log[1]
manual page.
--contents <file>:: --contents <file>::
When <rev> is not specified, the command annotates the When <rev> is not specified, the command annotates the
changes starting backwards from the working tree copy. changes starting backwards from the working tree copy.

View File

@ -37,9 +37,9 @@ of `i18n.commitencoding` in its `encoding` header. This is to
help other people who look at them later. Lack of this header help other people who look at them later. Lack of this header
implies that the commit log message is encoded in UTF-8. implies that the commit log message is encoded in UTF-8.
. 'git-log', 'git-show' and friends looks at the `encoding` . 'git-log', 'git-show', 'git-blame' and friends look at the
header of a commit object, and tries to re-code the log `encoding` header of a commit object, and try to re-code the
message into UTF-8 unless otherwise specified. You can log message into UTF-8 unless otherwise specified. You can
specify the desired output encoding with specify the desired output encoding with
`i18n.logoutputencoding` in `.git/config` file, like this: `i18n.logoutputencoding` in `.git/config` file, like this:
+ +

View File

@ -1431,7 +1431,7 @@ static void get_commit_info(struct commit *commit,
int detailed) int detailed)
{ {
int len; int len;
char *tmp, *endp; char *tmp, *endp, *reencoded, *message;
static char author_buf[1024]; static char author_buf[1024];
static char committer_buf[1024]; static char committer_buf[1024];
static char summary_buf[1024]; static char summary_buf[1024];
@ -1449,24 +1449,29 @@ static void get_commit_info(struct commit *commit,
die("Cannot read commit %s", die("Cannot read commit %s",
sha1_to_hex(commit->object.sha1)); sha1_to_hex(commit->object.sha1));
} }
reencoded = reencode_commit_message(commit, NULL);
message = reencoded ? reencoded : commit->buffer;
ret->author = author_buf; ret->author = author_buf;
get_ac_line(commit->buffer, "\nauthor ", get_ac_line(message, "\nauthor ",
sizeof(author_buf), author_buf, &ret->author_mail, sizeof(author_buf), author_buf, &ret->author_mail,
&ret->author_time, &ret->author_tz); &ret->author_time, &ret->author_tz);
if (!detailed) if (!detailed) {
free(reencoded);
return; return;
}
ret->committer = committer_buf; ret->committer = committer_buf;
get_ac_line(commit->buffer, "\ncommitter ", get_ac_line(message, "\ncommitter ",
sizeof(committer_buf), committer_buf, &ret->committer_mail, sizeof(committer_buf), committer_buf, &ret->committer_mail,
&ret->committer_time, &ret->committer_tz); &ret->committer_time, &ret->committer_tz);
ret->summary = summary_buf; ret->summary = summary_buf;
tmp = strstr(commit->buffer, "\n\n"); tmp = strstr(message, "\n\n");
if (!tmp) { if (!tmp) {
error_out: error_out:
sprintf(summary_buf, "(%s)", sha1_to_hex(commit->object.sha1)); sprintf(summary_buf, "(%s)", sha1_to_hex(commit->object.sha1));
free(reencoded);
return; return;
} }
tmp += 2; tmp += 2;
@ -1478,6 +1483,7 @@ static void get_commit_info(struct commit *commit,
goto error_out; goto error_out;
memcpy(summary_buf, tmp, len); memcpy(summary_buf, tmp, len);
summary_buf[len] = 0; summary_buf[len] = 0;
free(reencoded);
} }
/* /*

View File

@ -65,6 +65,8 @@ enum cmit_fmt {
extern int non_ascii(int); extern int non_ascii(int);
struct rev_info; /* in revision.h, it circularly uses enum cmit_fmt */ struct rev_info; /* in revision.h, it circularly uses enum cmit_fmt */
extern char *reencode_commit_message(const struct commit *commit,
const char **encoding_p);
extern void get_commit_format(const char *arg, struct rev_info *); extern void get_commit_format(const char *arg, struct rev_info *);
extern void format_commit_message(const struct commit *commit, extern void format_commit_message(const struct commit *commit,
const void *format, struct strbuf *sb, const void *format, struct strbuf *sb,

View File

@ -783,6 +783,20 @@ void pp_remainder(enum cmit_fmt fmt,
} }
} }
char *reencode_commit_message(const struct commit *commit, const char **encoding_p)
{
const char *encoding;
encoding = (git_log_output_encoding
? git_log_output_encoding
: git_commit_encoding);
if (!encoding)
encoding = "utf-8";
if (encoding_p)
*encoding_p = encoding;
return logmsg_reencode(commit, encoding);
}
void pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit, void pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit,
struct strbuf *sb, int abbrev, struct strbuf *sb, int abbrev,
const char *subject, const char *after_subject, const char *subject, const char *after_subject,
@ -799,12 +813,7 @@ void pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit,
return; return;
} }
encoding = (git_log_output_encoding reencoded = reencode_commit_message(commit, &encoding);
? git_log_output_encoding
: git_commit_encoding);
if (!encoding)
encoding = "utf-8";
reencoded = logmsg_reencode(commit, encoding);
if (reencoded) { if (reencoded) {
msg = reencoded; msg = reencoded;
} }

92
t/t8005-blame-i18n.sh Executable file
View File

@ -0,0 +1,92 @@
#!/bin/sh
test_description='git blame encoding conversion'
. ./test-lib.sh
. "$TEST_DIRECTORY"/t8005/utf8.txt
. "$TEST_DIRECTORY"/t8005/cp1251.txt
. "$TEST_DIRECTORY"/t8005/sjis.txt
test_expect_success 'setup the repository' '
# Create the file
echo "UTF-8 LINE" > file &&
git add file &&
git commit --author "$UTF8_NAME <utf8@localhost>" -m "$UTF8_MSG" &&
echo "CP1251 LINE" >> file &&
git add file &&
git config i18n.commitencoding cp1251 &&
git commit --author "$CP1251_NAME <cp1251@localhost>" -m "$CP1251_MSG" &&
echo "SJIS LINE" >> file &&
git add file &&
git config i18n.commitencoding shift-jis &&
git commit --author "$SJIS_NAME <sjis@localhost>" -m "$SJIS_MSG"
'
cat >expected <<EOF
author $SJIS_NAME
summary $SJIS_MSG
author $SJIS_NAME
summary $SJIS_MSG
author $SJIS_NAME
summary $SJIS_MSG
EOF
test_expect_success \
'blame respects i18n.commitencoding' '
git blame --incremental file | \
grep "^\(author\|summary\) " > actual &&
test_cmp actual expected
'
cat >expected <<EOF
author $CP1251_NAME
summary $CP1251_MSG
author $CP1251_NAME
summary $CP1251_MSG
author $CP1251_NAME
summary $CP1251_MSG
EOF
test_expect_success \
'blame respects i18n.logoutputencoding' '
git config i18n.logoutputencoding cp1251 &&
git blame --incremental file | \
grep "^\(author\|summary\) " > actual &&
test_cmp actual expected
'
cat >expected <<EOF
author $UTF8_NAME
summary $UTF8_MSG
author $UTF8_NAME
summary $UTF8_MSG
author $UTF8_NAME
summary $UTF8_MSG
EOF
test_expect_success \
'blame respects --encoding=utf-8' '
git blame --incremental --encoding=utf-8 file | \
grep "^\(author\|summary\) " > actual &&
test_cmp actual expected
'
cat >expected <<EOF
author $SJIS_NAME
summary $SJIS_MSG
author $CP1251_NAME
summary $CP1251_MSG
author $UTF8_NAME
summary $UTF8_MSG
EOF
test_expect_success \
'blame respects --encoding=none' '
git blame --incremental --encoding=none file | \
grep "^\(author\|summary\) " > actual &&
test_cmp actual expected
'
test_done

2
t/t8005/cp1251.txt Normal file
View File

@ -0,0 +1,2 @@
CP1251_NAME="Иван Петрович Сидоров"
CP1251_MSG="Тестовое сообщение"

2
t/t8005/sjis.txt Normal file
View File

@ -0,0 +1,2 @@
SJIS_NAME="„I„r„p„~ „P„u„„„„€„r„y„‰ „R„y„t„€„„€„r"
SJIS_MSG="„S„u„ƒ„„„€„r„€„u „ƒ„€„€„q„„u„~„y„u"

2
t/t8005/utf8.txt Normal file
View File

@ -0,0 +1,2 @@
UTF8_NAME="Иван Петрович Сидоров"
UTF8_MSG="Тестовое сообщение"