fast-import: support 'encoding' commit header

Since git supports commit messages with an encoding other than UTF-8,
allow fast-import to import such commits.  This may be useful for folks
who do not want to reencode commit messages from an external system, and
may also be useful to achieve reversible history rewrites (e.g. sha1sum
<-> sha256sum transitions or subtree work) with git repositories that
have used specialized encodings in their commit history.

Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Elijah Newren 2019-05-13 21:30:59 -07:00 committed by Junio C Hamano
parent 32615ce762
commit 3edfcc65fd
3 changed files with 36 additions and 2 deletions

View File

@ -388,6 +388,7 @@ change to the project.
original-oid? original-oid?
('author' (SP <name>)? SP LT <email> GT SP <when> LF)? ('author' (SP <name>)? SP LT <email> GT SP <when> LF)?
'committer' (SP <name>)? SP LT <email> GT SP <when> LF 'committer' (SP <name>)? SP LT <email> GT SP <when> LF
('encoding' SP <encoding>)?
data data
('from' SP <commit-ish> LF)? ('from' SP <commit-ish> LF)?
('merge' SP <commit-ish> LF)? ('merge' SP <commit-ish> LF)?
@ -455,6 +456,12 @@ that was selected by the --date-format=<fmt> command-line option.
See ``Date Formats'' above for the set of supported formats, and See ``Date Formats'' above for the set of supported formats, and
their syntax. their syntax.
`encoding`
^^^^^^^^^^
The optional `encoding` command indicates the encoding of the commit
message. Most commits are UTF-8 and the encoding is omitted, but this
allows importing commit messages into git without first reencoding them.
`from` `from`
^^^^^^ ^^^^^^
The `from` command is used to specify the commit to initialize The `from` command is used to specify the commit to initialize

View File

@ -2585,6 +2585,7 @@ static void parse_new_commit(const char *arg)
struct branch *b; struct branch *b;
char *author = NULL; char *author = NULL;
char *committer = NULL; char *committer = NULL;
const char *encoding = NULL;
struct hash_list *merge_list = NULL; struct hash_list *merge_list = NULL;
unsigned int merge_count; unsigned int merge_count;
unsigned char prev_fanout, new_fanout; unsigned char prev_fanout, new_fanout;
@ -2607,6 +2608,8 @@ static void parse_new_commit(const char *arg)
} }
if (!committer) if (!committer)
die("Expected committer but didn't get one"); die("Expected committer but didn't get one");
if (skip_prefix(command_buf.buf, "encoding ", &encoding))
read_next_command();
parse_data(&msg, 0, NULL); parse_data(&msg, 0, NULL);
read_next_command(); read_next_command();
parse_from(b); parse_from(b);
@ -2670,9 +2673,13 @@ static void parse_new_commit(const char *arg)
} }
strbuf_addf(&new_data, strbuf_addf(&new_data,
"author %s\n" "author %s\n"
"committer %s\n" "committer %s\n",
"\n",
author ? author : committer, committer); author ? author : committer, committer);
if (encoding)
strbuf_addf(&new_data,
"encoding %s\n",
encoding);
strbuf_addch(&new_data, '\n');
strbuf_addbuf(&new_data, &msg); strbuf_addbuf(&new_data, &msg);
free(author); free(author);
free(committer); free(committer);

View File

@ -3299,4 +3299,24 @@ test_expect_success !MINGW 'W: get-mark & empty orphan commit with erroneous thi
sed -e s/LFs/LLL/ W-input | tr L "\n" | test_must_fail git fast-import sed -e s/LFs/LLL/ W-input | tr L "\n" | test_must_fail git fast-import
' '
###
### series X (other new features)
###
test_expect_success 'X: handling encoding' '
test_tick &&
cat >input <<-INPUT_END &&
commit refs/heads/encoding
committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
encoding iso-8859-7
data <<COMMIT
INPUT_END
printf "Pi: \360\nCOMMIT\n" >>input &&
git fast-import <input &&
git cat-file -p encoding | grep $(printf "\360") &&
git log -1 --format=%B encoding | grep $(printf "\317\200")
'
test_done test_done