git/t/t4034-diff-words.sh

#!/bin/sh

test_description='word diff colors'

. ./test-lib.sh

test_expect_success setup '

	git config diff.color.old red
	git config diff.color.new green

'

decrypt_color () {
	sed \
		-e 's/.\[1m/<WHITE>/g' \
		-e 's/.\[31m/<RED>/g' \
		-e 's/.\[32m/<GREEN>/g' \
		-e 's/.\[36m/<BROWN>/g' \
		-e 's/.\[m/<RESET>/g'
}

word_diff () {
	test_must_fail git diff --no-index "$@" pre post > output &&
	decrypt_color < output > output.decrypted &&
	test_cmp expect output.decrypted
}

cat > pre <<\EOF
h(4)

a = b + c
EOF

cat > post <<\EOF
h(4),hh[44]

a = b + c

aa = a

aeff = aeff * ( aaa )
EOF

cat > expect <<\EOF
<WHITE>diff --git a/pre b/post<RESET>
<WHITE>index 330b04f..5ed8eff 100644<RESET>
<WHITE>--- a/pre<RESET>
<WHITE>+++ b/post<RESET>
<BROWN>@@ -1,3 +1,7 @@<RESET>
<RED>h(4)<RESET><GREEN>h(4),hh[44]<RESET>
<RESET>
a = b + c<RESET>

<GREEN>aa = a<RESET>

<GREEN>aeff = aeff * ( aaa )<RESET>
EOF

test_expect_success 'word diff with runs of whitespace' '

	word_diff --color-words

'

cat > expect <<\EOF
<WHITE>diff --git a/pre b/post<RESET>
<WHITE>index 330b04f..5ed8eff 100644<RESET>
<WHITE>--- a/pre<RESET>
<WHITE>+++ b/post<RESET>
<BROWN>@@ -1,3 +1,7 @@<RESET>
h(4),<GREEN>hh<RESET>[44]
<RESET>
a = b + c<RESET>

<GREEN>aa = a<RESET>

<GREEN>aeff = aeff * ( aaa<RESET> )
EOF
cp expect expect.letter-runs-are-words

test_expect_success 'word diff with a regular expression' '

	word_diff --color-words="[a-z]+"

'

test_expect_success 'set a diff driver' '
	git config diff.testdriver.wordregex "[^[:space:]]" &&
	cat <<EOF > .gitattributes
pre diff=testdriver
post diff=testdriver
EOF
'

test_expect_success 'option overrides .gitattributes' '

	word_diff --color-words="[a-z]+"

'

cat > expect <<\EOF
<WHITE>diff --git a/pre b/post<RESET>
<WHITE>index 330b04f..5ed8eff 100644<RESET>
<WHITE>--- a/pre<RESET>
<WHITE>+++ b/post<RESET>
<BROWN>@@ -1,3 +1,7 @@<RESET>
h(4)<GREEN>,hh[44]<RESET>
<RESET>
a = b + c<RESET>

<GREEN>aa = a<RESET>

<GREEN>aeff = aeff * ( aaa )<RESET>
EOF
cp expect expect.non-whitespace-is-word

test_expect_success 'use regex supplied by driver' '

	word_diff --color-words

'

test_expect_success 'set diff.wordregex option' '
	git config diff.wordregex "[[:alnum:]]+"
'

cp expect.letter-runs-are-words expect

test_expect_success 'command-line overrides config' '
	word_diff --color-words="[a-z]+"
'

cp expect.non-whitespace-is-word expect

test_expect_success '.gitattributes override config' '
	word_diff --color-words
'

test_expect_success 'remove diff driver regex' '
	git config --unset diff.testdriver.wordregex
'

cat > expect <<\EOF
<WHITE>diff --git a/pre b/post<RESET>
<WHITE>index 330b04f..5ed8eff 100644<RESET>
<WHITE>--- a/pre<RESET>
<WHITE>+++ b/post<RESET>
<BROWN>@@ -1,3 +1,7 @@<RESET>
h(4),<GREEN>hh[44<RESET>]
<RESET>
a = b + c<RESET>

<GREEN>aa = a<RESET>

<GREEN>aeff = aeff * ( aaa<RESET> )
EOF

test_expect_success 'use configured regex' '
	word_diff --color-words
'

echo 'aaa (aaa)' > pre
echo 'aaa (aaa) aaa' > post

cat > expect <<\EOF
<WHITE>diff --git a/pre b/post<RESET>
<WHITE>index c29453b..be22f37 100644<RESET>
<WHITE>--- a/pre<RESET>
<WHITE>+++ b/post<RESET>
<BROWN>@@ -1 +1 @@<RESET>
aaa (aaa) <GREEN>aaa<RESET>
EOF

test_expect_success 'test parsing words for newline' '

	word_diff --color-words="a+"


'

echo '(:' > pre
echo '(' > post

cat > expect <<\EOF
<WHITE>diff --git a/pre b/post<RESET>
<WHITE>index 289cb9d..2d06f37 100644<RESET>
<WHITE>--- a/pre<RESET>
<WHITE>+++ b/post<RESET>
<BROWN>@@ -1 +1 @@<RESET>
(<RED>:<RESET>
EOF

test_expect_success 'test when words are only removed at the end' '

	word_diff --color-words=.

'

test_done
color-words: change algorithm to allow for 0-character word boundaries Up until now, the color-words code assumed that word boundaries are identical to white space characters. Therefore, it could get away with a very simple scheme: it copied the hunks, substituted newlines for each white space character, called libxdiff with the processed text, and then identified the text to output by the offsets (which agreed since the original text had the same length). This code was ugly, for a number of reasons: - it was impossible to introduce 0-character word boundaries, - we had to print everything word by word, and - the code needed extra special handling of newlines in the removed part. Fix all of these issues by processing the text such that - we build word lists, separated by newlines, - we remember the original offsets for every word, and - after calling libxdiff on the wordlists, we parse the hunk headers, and find the corresponding offsets, and then - we print the removed/added parts in one go. The pre and post samples in the test were provided by Santi Béjar. Note that there is some strange special handling of hunk headers where one line range is 0 due to POSIX: in this case, the start is one too low. In other words a hunk header '@@ -1,0 +2 @@' actually means that the line must be added after the _second_ line of the pre text, _not_ the first. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2009-01-18 00:29:44 +08:00			`#!/bin/sh`

			`test_description='word diff colors'`

			`. ./test-lib.sh`

			`test_expect_success setup '`

			`git config diff.color.old red`
			`git config diff.color.new green`

			`'`

			`decrypt_color () {`
			`sed \`
			`-e 's/.\[1m/<WHITE>/g' \`
			`-e 's/.\[31m/<RED>/g' \`
			`-e 's/.\[32m/<GREEN>/g' \`
			`-e 's/.\[36m/<BROWN>/g' \`
			`-e 's/.\[m/<RESET>/g'`
			`}`

			`word_diff () {`
			`test_must_fail git diff --no-index "$@" pre post > output &&`
			`decrypt_color < output > output.decrypted &&`
			`test_cmp expect output.decrypted`
			`}`

			`cat > pre <<\EOF`
			`h(4)`

			`a = b + c`
			`EOF`

			`cat > post <<\EOF`
			`h(4),hh[44]`

			`a = b + c`

			`aa = a`

			`aeff = aeff * ( aaa )`
			`EOF`

			`cat > expect <<\EOF`
			`<WHITE>diff --git a/pre b/post<RESET>`
			`<WHITE>index 330b04f..5ed8eff 100644<RESET>`
			`<WHITE>--- a/pre<RESET>`
			`<WHITE>+++ b/post<RESET>`
			`<BROWN>@@ -1,3 +1,7 @@<RESET>`
			`<RED>h(4)<RESET><GREEN>h(4),hh[44]<RESET>`
			`<RESET>`
			`a = b + c<RESET>`

			`<GREEN>aa = a<RESET>`

			`<GREEN>aeff = aeff * ( aaa )<RESET>`
			`EOF`

			`test_expect_success 'word diff with runs of whitespace' '`

			`word_diff --color-words`

			`'`

color-words: take an optional regular expression describing words In some applications, words are not delimited by white space. To allow for that, you can specify a regular expression describing what makes a word with git diff --color-words='[A-Za-z0-9]+' Note that words cannot contain newline characters. As suggested by Thomas Rast, the words are the exact matches of the regular expression. Note that a regular expression beginning with a '^' will match only a word at the beginning of the hunk, not a word at the beginning of a line, and is probably not what you want. This commit contains a quoting fix by Thomas Rast. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2009-01-18 00:29:45 +08:00			`cat > expect <<\EOF`
			`<WHITE>diff --git a/pre b/post<RESET>`
			`<WHITE>index 330b04f..5ed8eff 100644<RESET>`
			`<WHITE>--- a/pre<RESET>`
			`<WHITE>+++ b/post<RESET>`
			`<BROWN>@@ -1,3 +1,7 @@<RESET>`
			`h(4),<GREEN>hh<RESET>[44]`
			`<RESET>`
			`a = b + c<RESET>`

			`<GREEN>aa = a<RESET>`

			`<GREEN>aeff = aeff * ( aaa<RESET> )`
			`EOF`
color-words: Support diff.wordregex config option When diff is invoked with --color-words (w/o =regex), use the regular expression the user has configured as diff.wordregex. diff drivers configured via attributes take precedence over the diff.wordregex-words setting. If the user wants to change them, they have their own configuration variables. Signed-off-by: Boyd Stephen Smith Jr <bss@iguanasuicide.net> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2009-01-21 11:46:57 +08:00			`cp expect expect.letter-runs-are-words`
color-words: take an optional regular expression describing words In some applications, words are not delimited by white space. To allow for that, you can specify a regular expression describing what makes a word with git diff --color-words='[A-Za-z0-9]+' Note that words cannot contain newline characters. As suggested by Thomas Rast, the words are the exact matches of the regular expression. Note that a regular expression beginning with a '^' will match only a word at the beginning of the hunk, not a word at the beginning of a line, and is probably not what you want. This commit contains a quoting fix by Thomas Rast. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2009-01-18 00:29:45 +08:00
			`test_expect_success 'word diff with a regular expression' '`

			`word_diff --color-words="[a-z]+"`

			`'`

color-words: make regex configurable via attributes Make the --color-words splitting regular expression configurable via the diff driver's 'wordregex' attribute. The user can then set the driver on a file in .gitattributes. If a regex is given on the command line, it overrides the driver's setting. We also provide built-in regexes for the languages that already had funcname patterns, and add an appropriate diff driver entry for C/++. (The patterns are designed to run UTF-8 sequences into a single chunk to make sure they remain readable.) Signed-off-by: Thomas Rast <trast@student.ethz.ch> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2009-01-18 00:29:48 +08:00			`test_expect_success 'set a diff driver' '`
			`git config diff.testdriver.wordregex "[^[:space:]]" &&`
			`cat <<EOF > .gitattributes`
			`pre diff=testdriver`
			`post diff=testdriver`
			`EOF`
			`'`

color-words: Support diff.wordregex config option When diff is invoked with --color-words (w/o =regex), use the regular expression the user has configured as diff.wordregex. diff drivers configured via attributes take precedence over the diff.wordregex-words setting. If the user wants to change them, they have their own configuration variables. Signed-off-by: Boyd Stephen Smith Jr <bss@iguanasuicide.net> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2009-01-21 11:46:57 +08:00			`test_expect_success 'option overrides .gitattributes' '`
color-words: make regex configurable via attributes Make the --color-words splitting regular expression configurable via the diff driver's 'wordregex' attribute. The user can then set the driver on a file in .gitattributes. If a regex is given on the command line, it overrides the driver's setting. We also provide built-in regexes for the languages that already had funcname patterns, and add an appropriate diff driver entry for C/++. (The patterns are designed to run UTF-8 sequences into a single chunk to make sure they remain readable.) Signed-off-by: Thomas Rast <trast@student.ethz.ch> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2009-01-18 00:29:48 +08:00
			`word_diff --color-words="[a-z]+"`

			`'`

			`cat > expect <<\EOF`
			`<WHITE>diff --git a/pre b/post<RESET>`
			`<WHITE>index 330b04f..5ed8eff 100644<RESET>`
			`<WHITE>--- a/pre<RESET>`
			`<WHITE>+++ b/post<RESET>`
			`<BROWN>@@ -1,3 +1,7 @@<RESET>`
			`h(4)<GREEN>,hh[44]<RESET>`
			`<RESET>`
			`a = b + c<RESET>`

			`<GREEN>aa = a<RESET>`

			`<GREEN>aeff = aeff * ( aaa )<RESET>`
			`EOF`
color-words: Support diff.wordregex config option When diff is invoked with --color-words (w/o =regex), use the regular expression the user has configured as diff.wordregex. diff drivers configured via attributes take precedence over the diff.wordregex-words setting. If the user wants to change them, they have their own configuration variables. Signed-off-by: Boyd Stephen Smith Jr <bss@iguanasuicide.net> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2009-01-21 11:46:57 +08:00			`cp expect expect.non-whitespace-is-word`
color-words: make regex configurable via attributes Make the --color-words splitting regular expression configurable via the diff driver's 'wordregex' attribute. The user can then set the driver on a file in .gitattributes. If a regex is given on the command line, it overrides the driver's setting. We also provide built-in regexes for the languages that already had funcname patterns, and add an appropriate diff driver entry for C/++. (The patterns are designed to run UTF-8 sequences into a single chunk to make sure they remain readable.) Signed-off-by: Thomas Rast <trast@student.ethz.ch> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2009-01-18 00:29:48 +08:00
color-words: Support diff.wordregex config option When diff is invoked with --color-words (w/o =regex), use the regular expression the user has configured as diff.wordregex. diff drivers configured via attributes take precedence over the diff.wordregex-words setting. If the user wants to change them, they have their own configuration variables. Signed-off-by: Boyd Stephen Smith Jr <bss@iguanasuicide.net> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2009-01-21 11:46:57 +08:00			`test_expect_success 'use regex supplied by driver' '`
color-words: make regex configurable via attributes Make the --color-words splitting regular expression configurable via the diff driver's 'wordregex' attribute. The user can then set the driver on a file in .gitattributes. If a regex is given on the command line, it overrides the driver's setting. We also provide built-in regexes for the languages that already had funcname patterns, and add an appropriate diff driver entry for C/++. (The patterns are designed to run UTF-8 sequences into a single chunk to make sure they remain readable.) Signed-off-by: Thomas Rast <trast@student.ethz.ch> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2009-01-18 00:29:48 +08:00
			`word_diff --color-words`

			`'`

color-words: Support diff.wordregex config option When diff is invoked with --color-words (w/o =regex), use the regular expression the user has configured as diff.wordregex. diff drivers configured via attributes take precedence over the diff.wordregex-words setting. If the user wants to change them, they have their own configuration variables. Signed-off-by: Boyd Stephen Smith Jr <bss@iguanasuicide.net> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2009-01-21 11:46:57 +08:00			`test_expect_success 'set diff.wordregex option' '`
			`git config diff.wordregex "[[:alnum:]]+"`
			`'`

			`cp expect.letter-runs-are-words expect`

			`test_expect_success 'command-line overrides config' '`
			`word_diff --color-words="[a-z]+"`
			`'`

			`cp expect.non-whitespace-is-word expect`

			`test_expect_success '.gitattributes override config' '`
			`word_diff --color-words`
			`'`

			`test_expect_success 'remove diff driver regex' '`
			`git config --unset diff.testdriver.wordregex`
			`'`

			`cat > expect <<\EOF`
			`<WHITE>diff --git a/pre b/post<RESET>`
			`<WHITE>index 330b04f..5ed8eff 100644<RESET>`
			`<WHITE>--- a/pre<RESET>`
			`<WHITE>+++ b/post<RESET>`
			`<BROWN>@@ -1,3 +1,7 @@<RESET>`
			`h(4),<GREEN>hh[44<RESET>]`
			`<RESET>`
			`a = b + c<RESET>`

			`<GREEN>aa = a<RESET>`

			`<GREEN>aeff = aeff * ( aaa<RESET> )`
			`EOF`

			`test_expect_success 'use configured regex' '`
			`word_diff --color-words`
			`'`

color-words: take an optional regular expression describing words In some applications, words are not delimited by white space. To allow for that, you can specify a regular expression describing what makes a word with git diff --color-words='[A-Za-z0-9]+' Note that words cannot contain newline characters. As suggested by Thomas Rast, the words are the exact matches of the regular expression. Note that a regular expression beginning with a '^' will match only a word at the beginning of the hunk, not a word at the beginning of a line, and is probably not what you want. This commit contains a quoting fix by Thomas Rast. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2009-01-18 00:29:45 +08:00			`echo 'aaa (aaa)' > pre`
			`echo 'aaa (aaa) aaa' > post`

			`cat > expect <<\EOF`
			`<WHITE>diff --git a/pre b/post<RESET>`
			`<WHITE>index c29453b..be22f37 100644<RESET>`
			`<WHITE>--- a/pre<RESET>`
			`<WHITE>+++ b/post<RESET>`
			`<BROWN>@@ -1 +1 @@<RESET>`
			`aaa (aaa) <GREEN>aaa<RESET>`
			`EOF`

			`test_expect_success 'test parsing words for newline' '`

			`word_diff --color-words="a+"`

color-words: make regex configurable via attributes Make the --color-words splitting regular expression configurable via the diff driver's 'wordregex' attribute. The user can then set the driver on a file in .gitattributes. If a regex is given on the command line, it overrides the driver's setting. We also provide built-in regexes for the languages that already had funcname patterns, and add an appropriate diff driver entry for C/++. (The patterns are designed to run UTF-8 sequences into a single chunk to make sure they remain readable.) Signed-off-by: Thomas Rast <trast@student.ethz.ch> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2009-01-18 00:29:48 +08:00
color-words: take an optional regular expression describing words In some applications, words are not delimited by white space. To allow for that, you can specify a regular expression describing what makes a word with git diff --color-words='[A-Za-z0-9]+' Note that words cannot contain newline characters. As suggested by Thomas Rast, the words are the exact matches of the regular expression. Note that a regular expression beginning with a '^' will match only a word at the beginning of the hunk, not a word at the beginning of a line, and is probably not what you want. This commit contains a quoting fix by Thomas Rast. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2009-01-18 00:29:45 +08:00			`'`

			`echo '(:' > pre`
			`echo '(' > post`

			`cat > expect <<\EOF`
			`<WHITE>diff --git a/pre b/post<RESET>`
			`<WHITE>index 289cb9d..2d06f37 100644<RESET>`
			`<WHITE>--- a/pre<RESET>`
			`<WHITE>+++ b/post<RESET>`
			`<BROWN>@@ -1 +1 @@<RESET>`
			`(<RED>:<RESET>`
			`EOF`

			`test_expect_success 'test when words are only removed at the end' '`

			`word_diff --color-words=.`

			`'`

color-words: change algorithm to allow for 0-character word boundaries Up until now, the color-words code assumed that word boundaries are identical to white space characters. Therefore, it could get away with a very simple scheme: it copied the hunks, substituted newlines for each white space character, called libxdiff with the processed text, and then identified the text to output by the offsets (which agreed since the original text had the same length). This code was ugly, for a number of reasons: - it was impossible to introduce 0-character word boundaries, - we had to print everything word by word, and - the code needed extra special handling of newlines in the removed part. Fix all of these issues by processing the text such that - we build word lists, separated by newlines, - we remember the original offsets for every word, and - after calling libxdiff on the wordlists, we parse the hunk headers, and find the corresponding offsets, and then - we print the removed/added parts in one go. The pre and post samples in the test were provided by Santi Béjar. Note that there is some strange special handling of hunk headers where one line range is 0 due to POSIX: in this case, the start is one too low. In other words a hunk header '@@ -1,0 +2 @@' actually means that the line must be added after the _second_ line of the pre text, _not_ the first. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2009-01-18 00:29:44 +08:00			`test_done`