apply --unidiff-zero: loosen sanity checks for --unidiff=0 patches

In "git-apply", we have a few sanity checks and heuristics that
expects that the patch fed to us is a unified diff with at least
one line of context.

 * When there is no leading context line in a hunk, the hunk
   must apply at the beginning of the preimage.  Similarly, no
   trailing context means that the hunk is anchored at the end.

 * We learn a patch deletes the file from a hunk that has no
   resulting line (i.e. all lines are prefixed with '-') if it
   has not otherwise been known if the patch deletes the file.
   Similarly, no old line means the file is being created.

And we declare an error condition when the file created by a
creation patch already exists, and/or when a deletion patch
still leaves content in the file.

These sanity checks are good safety measures, but breaks down
when people feed a diff generated with --unified=0.  This was
recently noticed first by Matthew Wilcox and Gerrit Pape.

This adds a new flag, --unified-zero, to allow bypassing these
checks.  If you are in control of the patch generation process,
you should not use --unified=0 patch and fix it up with this
flag; rather you should try work with a patch with context.  But
if all you have to work with is a patch without context, this
flag may come handy as the last resort.

Signed-off-by: Junio C Hamano <junkio@cox.net>
This commit is contained in:
Junio C Hamano 2006-09-17 01:04:24 -07:00
parent 8aac4b45f3
commit 4be609625e
3 changed files with 197 additions and 34 deletions

View File

@ -27,6 +27,7 @@ static const char *prefix;
static int prefix_length = -1;
static int newfd = -1;
static int unidiff_zero;
static int p_value = 1;
static int check_index;
static int write_index;
@ -854,11 +855,10 @@ static int find_header(char *line, unsigned long size, int *hdrsize, struct patc
}
/*
* Parse a unified diff. Note that this really needs
* to parse each fragment separately, since the only
* way to know the difference between a "---" that is
* part of a patch, and a "---" that starts the next
* patch is to look at the line counts..
* Parse a unified diff. Note that this really needs to parse each
* fragment separately, since the only way to know the difference
* between a "---" that is part of a patch, and a "---" that starts
* the next patch is to look at the line counts..
*/
static int parse_fragment(char *line, unsigned long size, struct patch *patch, struct fragment *fragment)
{
@ -875,31 +875,14 @@ static int parse_fragment(char *line, unsigned long size, struct patch *patch, s
leading = 0;
trailing = 0;
if (patch->is_new < 0) {
patch->is_new = !oldlines;
if (!oldlines)
patch->old_name = NULL;
}
if (patch->is_delete < 0) {
patch->is_delete = !newlines;
if (!newlines)
patch->new_name = NULL;
}
if (patch->is_new && oldlines)
return error("new file depends on old contents");
if (patch->is_delete != !newlines) {
if (newlines)
return error("deleted file still has contents");
fprintf(stderr, "** warning: file %s becomes empty but is not deleted\n", patch->new_name);
}
/* Parse the thing.. */
line += len;
size -= len;
linenr++;
added = deleted = 0;
for (offset = len; size > 0; offset += len, size -= len, line += len, linenr++) {
for (offset = len;
0 < size;
offset += len, size -= len, line += len, linenr++) {
if (!oldlines && !newlines)
break;
len = linelen(line, size);
@ -972,12 +955,18 @@ static int parse_fragment(char *line, unsigned long size, struct patch *patch, s
patch->lines_added += added;
patch->lines_deleted += deleted;
if (0 < patch->is_new && oldlines)
return error("new file depends on old contents");
if (0 < patch->is_delete && newlines)
return error("deleted file still has contents");
return offset;
}
static int parse_single_patch(char *line, unsigned long size, struct patch *patch)
{
unsigned long offset = 0;
unsigned long oldlines = 0, newlines = 0, context = 0;
struct fragment **fragp = &patch->fragments;
while (size > 4 && !memcmp(line, "@@ -", 4)) {
@ -988,9 +977,11 @@ static int parse_single_patch(char *line, unsigned long size, struct patch *patc
len = parse_fragment(line, size, patch, fragment);
if (len <= 0)
die("corrupt patch at line %d", linenr);
fragment->patch = line;
fragment->size = len;
oldlines += fragment->oldlines;
newlines += fragment->newlines;
context += fragment->leading + fragment->trailing;
*fragp = fragment;
fragp = &fragment->next;
@ -999,6 +990,46 @@ static int parse_single_patch(char *line, unsigned long size, struct patch *patc
line += len;
size -= len;
}
/*
* If something was removed (i.e. we have old-lines) it cannot
* be creation, and if something was added it cannot be
* deletion. However, the reverse is not true; --unified=0
* patches that only add are not necessarily creation even
* though they do not have any old lines, and ones that only
* delete are not necessarily deletion.
*
* Unfortunately, a real creation/deletion patch do _not_ have
* any context line by definition, so we cannot safely tell it
* apart with --unified=0 insanity. At least if the patch has
* more than one hunk it is not creation or deletion.
*/
if (patch->is_new < 0 &&
(oldlines || (patch->fragments && patch->fragments->next)))
patch->is_new = 0;
if (patch->is_delete < 0 &&
(newlines || (patch->fragments && patch->fragments->next)))
patch->is_delete = 0;
if (!unidiff_zero || context) {
/* If the user says the patch is not generated with
* --unified=0, or if we have seen context lines,
* then not having oldlines means the patch is creation,
* and not having newlines means the patch is deletion.
*/
if (patch->is_new < 0 && !oldlines)
patch->is_new = 1;
if (patch->is_delete < 0 && !newlines)
patch->is_delete = 1;
}
if (0 < patch->is_new && oldlines)
die("new file %s depends on old contents", patch->new_name);
if (0 < patch->is_delete && newlines)
die("deleted file %s still has contents", patch->old_name);
if (!patch->is_delete && !newlines && context)
fprintf(stderr, "** warning: file %s becomes empty but "
"is not deleted\n", patch->new_name);
return offset;
}
@ -1556,9 +1587,19 @@ static int apply_one_fragment(struct buffer_desc *desc, struct fragment *frag, i
/*
* If we don't have any leading/trailing data in the patch,
* we want it to match at the beginning/end of the file.
*
* But that would break if the patch is generated with
* --unified=0; sane people wouldn't do that to cause us
* trouble, but we try to please not so sane ones as well.
*/
match_beginning = !leading && (frag->oldpos == 1);
match_end = !trailing;
if (unidiff_zero) {
match_beginning = (!leading && !frag->oldpos);
match_end = 0;
}
else {
match_beginning = !leading && (frag->oldpos == 1);
match_end = !trailing;
}
lines = 0;
pos = frag->newpos;
@ -1804,7 +1845,7 @@ static int apply_data(struct patch *patch, struct stat *st, struct cache_entry *
patch->result = desc.buffer;
patch->resultsize = desc.size;
if (patch->is_delete && patch->resultsize)
if (0 < patch->is_delete && patch->resultsize)
return error("removal patch leaves file contents");
return 0;
@ -1876,7 +1917,7 @@ static int check_patch(struct patch *patch, struct patch *prev_patch)
old_name, st_mode, patch->old_mode);
}
if (new_name && prev_patch && prev_patch->is_delete &&
if (new_name && prev_patch && 0 < prev_patch->is_delete &&
!strcmp(prev_patch->old_name, new_name))
/* A type-change diff is always split into a patch to
* delete old, immediately followed by a patch to
@ -1889,7 +1930,8 @@ static int check_patch(struct patch *patch, struct patch *prev_patch)
else
ok_if_exists = 0;
if (new_name && (patch->is_new | patch->is_rename | patch->is_copy)) {
if (new_name &&
((0 < patch->is_new) | (0 < patch->is_rename) | patch->is_copy)) {
if (check_index &&
cache_name_pos(new_name, strlen(new_name)) >= 0 &&
!ok_if_exists)
@ -1906,7 +1948,7 @@ static int check_patch(struct patch *patch, struct patch *prev_patch)
return error("%s: %s", new_name, strerror(errno));
}
if (!patch->new_mode) {
if (patch->is_new)
if (0 < patch->is_new)
patch->new_mode = S_IFREG | 0644;
else
patch->new_mode = patch->old_mode;
@ -1957,7 +1999,7 @@ static void show_index_list(struct patch *list)
const char *name;
name = patch->old_name ? patch->old_name : patch->new_name;
if (patch->is_new)
if (0 < patch->is_new)
sha1_ptr = null_sha1;
else if (get_sha1(patch->old_sha1_prefix, sha1))
die("sha1 information is lacking or useless (%s).",
@ -2543,6 +2585,10 @@ int cmd_apply(int argc, const char **argv, const char *prefix)
apply_in_reverse = 1;
continue;
}
if (!strcmp(arg, "--unidiff-zero")) {
unidiff_zero = 1;
continue;
}
if (!strcmp(arg, "--reject")) {
apply = apply_with_reject = apply_verbosely = 1;
continue;

View File

@ -37,7 +37,9 @@ test_expect_success setup '
git branch skip-merge skip-reference
'
test_expect_failure 'rebase with git am -3 (default)' 'git rebase master'
test_expect_failure 'rebase with git am -3 (default)' '
git rebase master
'
test_expect_success 'rebase --skip with am -3' '
git reset --hard HEAD &&

115
t/t4104-apply-boundary.sh Executable file
View File

@ -0,0 +1,115 @@
#!/bin/sh
#
# Copyright (c) 2005 Junio C Hamano
#
test_description='git-apply boundary tests
'
. ./test-lib.sh
L="c d e f g h i j k l m n o p q r s t u v w x"
test_expect_success setup '
for i in b '"$L"' y
do
echo $i
done >victim &&
cat victim >original &&
git update-index --add victim &&
: add to the head
for i in a b '"$L"' y
do
echo $i
done >victim &&
cat victim >add-a-expect &&
git diff victim >add-a-patch.with &&
git diff --unified=0 >add-a-patch.without &&
: modify at the head
for i in a '"$L"' y
do
echo $i
done >victim &&
cat victim >mod-a-expect &&
git diff victim >mod-a-patch.with &&
git diff --unified=0 >mod-a-patch.without &&
: remove from the head
for i in '"$L"' y
do
echo $i
done >victim &&
cat victim >del-a-expect &&
git diff victim >del-a-patch.with
git diff --unified=0 >del-a-patch.without &&
: add to the tail
for i in b '"$L"' y z
do
echo $i
done >victim &&
cat victim >add-z-expect &&
git diff victim >add-z-patch.with &&
git diff --unified=0 >add-z-patch.without &&
: modify at the tail
for i in a '"$L"' y
do
echo $i
done >victim &&
cat victim >mod-z-expect &&
git diff victim >mod-z-patch.with &&
git diff --unified=0 >mod-z-patch.without &&
: remove from the tail
for i in b '"$L"'
do
echo $i
done >victim &&
cat victim >del-z-expect &&
git diff victim >del-z-patch.with
git diff --unified=0 >del-z-patch.without &&
: done
'
for with in with without
do
case "$with" in
with) u= ;;
without) u='--unidiff-zero ' ;;
esac
for kind in add-a add-z mod-a mod-z del-a del-z
do
test_expect_success "apply $kind-patch $with context" '
cat original >victim &&
git update-index victim &&
git apply --index '"$u$kind-patch.$with"' || {
cat '"$kind-patch.$with"'
(exit 1)
} &&
diff -u '"$kind"'-expect victim
'
done
done
for kind in add-a add-z mod-a mod-z del-a del-z
do
rm -f $kind-ng.without
sed -e "s/^diff --git /diff /" \
-e '/^index /d' \
<$kind-patch.without >$kind-ng.without
test_expect_success "apply non-git $kind-patch without context" '
cat original >victim &&
git update-index victim &&
git apply --unidiff-zero --index '"$kind-ng.without"' || {
cat '"$kind-ng.without"'
(exit 1)
} &&
diff -u '"$kind"'-expect victim
'
done
test_done