Merge branch 'rs/subtree-fixes'

Various subtree fixes.

* rs/subtree-fixes:
  subtree: performance improvement for finding unexpected parent commits
  subtree: improve decision on merges kept in split
  subtree: use commits before rejoins for splits
  subtree: make --ignore-joins pay attention to adds
  subtree: refactor split of a commit into standalone method
This commit is contained in:
Junio C Hamano 2018-10-26 14:22:08 +09:00
commit e7b07376e5

View File

@ -231,12 +231,14 @@ cache_miss () {
}
check_parents () {
missed=$(cache_miss "$@")
missed=$(cache_miss "$1")
local indent=$(($2 + 1))
for miss in $missed
do
if ! test -r "$cachedir/notree/$miss"
then
debug " incorrect order: $miss"
process_split_commit "$miss" "" "$indent"
fi
done
}
@ -340,7 +342,12 @@ find_existing_splits () {
revs="$2"
main=
sub=
git log --grep="^git-subtree-dir: $dir/*\$" \
local grep_format="^git-subtree-dir: $dir/*\$"
if test -n "$ignore_joins"
then
grep_format="^Add '$dir/' from commit '"
fi
git log --grep="$grep_format" \
--no-show-signature --pretty=format:'START %H%n%s%n%n%b%nEND%n' $revs |
while read a b junk
do
@ -534,6 +541,7 @@ copy_or_skip () {
nonidentical=
p=
gotparents=
copycommit=
for parent in $newparents
do
ptree=$(toptree_for_commit $parent) || exit $?
@ -541,7 +549,24 @@ copy_or_skip () {
if test "$ptree" = "$tree"
then
# an identical parent could be used in place of this rev.
identical="$parent"
if test -n "$identical"
then
# if a previous identical parent was found, check whether
# one is already an ancestor of the other
mergebase=$(git merge-base $identical $parent)
if test "$identical" = "$mergebase"
then
# current identical commit is an ancestor of parent
identical="$parent"
elif test "$parent" != "$mergebase"
then
# no common history; commit must be copied
copycommit=1
fi
else
# first identical parent detected
identical="$parent"
fi
else
nonidentical="$parent"
fi
@ -564,7 +589,6 @@ copy_or_skip () {
fi
done
copycommit=
if test -n "$identical" && test -n "$nonidentical"
then
extras=$(git rev-list --count $identical..$nonidentical)
@ -598,6 +622,58 @@ ensure_valid_ref_format () {
die "'$1' does not look like a ref"
}
process_split_commit () {
local rev="$1"
local parents="$2"
local indent=$3
if test $indent -eq 0
then
revcount=$(($revcount + 1))
else
# processing commit without normal parent information;
# fetch from repo
parents=$(git rev-parse "$rev^@")
extracount=$(($extracount + 1))
fi
progress "$revcount/$revmax ($createcount) [$extracount]"
debug "Processing commit: $rev"
exists=$(cache_get "$rev")
if test -n "$exists"
then
debug " prior: $exists"
return
fi
createcount=$(($createcount + 1))
debug " parents: $parents"
check_parents "$parents" "$indent"
newparents=$(cache_get $parents)
debug " newparents: $newparents"
tree=$(subtree_for_commit "$rev" "$dir")
debug " tree is: $tree"
# ugly. is there no better way to tell if this is a subtree
# vs. a mainline commit? Does it matter?
if test -z "$tree"
then
set_notree "$rev"
if test -n "$newparents"
then
cache_set "$rev" "$rev"
fi
return
fi
newrev=$(copy_or_skip "$rev" "$tree" "$newparents") || exit $?
debug " newrev is: $newrev"
cache_set "$rev" "$newrev"
cache_set latest_new "$newrev"
cache_set latest_old "$rev"
}
cmd_add () {
if test -e "$dir"
then
@ -689,12 +765,7 @@ cmd_split () {
done
fi
if test -n "$ignore_joins"
then
unrevs=
else
unrevs="$(find_existing_splits "$dir" "$revs")"
fi
unrevs="$(find_existing_splits "$dir" "$revs")"
# We can't restrict rev-list to only $dir here, because some of our
# parents have the $dir contents the root, and those won't match.
@ -703,45 +774,11 @@ cmd_split () {
revmax=$(eval "$grl" | wc -l)
revcount=0
createcount=0
extracount=0
eval "$grl" |
while read rev parents
do
revcount=$(($revcount + 1))
progress "$revcount/$revmax ($createcount)"
debug "Processing commit: $rev"
exists=$(cache_get "$rev")
if test -n "$exists"
then
debug " prior: $exists"
continue
fi
createcount=$(($createcount + 1))
debug " parents: $parents"
newparents=$(cache_get $parents)
debug " newparents: $newparents"
tree=$(subtree_for_commit "$rev" "$dir")
debug " tree is: $tree"
check_parents $parents
# ugly. is there no better way to tell if this is a subtree
# vs. a mainline commit? Does it matter?
if test -z "$tree"
then
set_notree "$rev"
if test -n "$newparents"
then
cache_set "$rev" "$rev"
fi
continue
fi
newrev=$(copy_or_skip "$rev" "$tree" "$newparents") || exit $?
debug " newrev is: $newrev"
cache_set "$rev" "$newrev"
cache_set latest_new "$newrev"
cache_set latest_old "$rev"
process_split_commit "$rev" "$parents" 0
done || exit $?
latest_new=$(cache_get latest_new)