cut: restore special case handling of -f with -d$'\n'

commits v8.20-98-g51ce0bf and v8.20-99-gd302aed changed cut(1)
to process each line independently and thus promptly output
each line without buffering.  As part of those changes we removed
the special handling of --delimiter=$'\n' --fields=... which
could be used to select arbitrary (ranges of) lines, so as to
simplify and optimize the implementation while also matching the
behavior of different cut(1) implementations.

However that GNU behavior was in place for a long time, and
could be useful in certain cases like making a separated list like
`seq 10 | cut -f1- -d$'\n' --output-delimiter=,` although other tools
like head(1) and paste(1) are more suited to this operation.
This patch reinstates that functionality but restricts the
"line behind" buffering behavior to only the -d$'\n' case.

We also fix the following related edge case to be more consistent:

  before> printf "\n" | cut -s -d$'\n' -f1- | wc -l
  2
  before> printf "\n" | cut    -d$'\n' -f1- | wc -l
  1
  after > printf "\n" | cut -s -d$'\n' -f1- | wc -l
  1
  after > printf "\n" | cut    -d$'\n' -f1- | wc -l
  1

* src/cut.c (cut_fields): Adjust as discussed above.
* tests/misc/cut.pl: Likewise.
* NEWS: Mention the change in behavior both for v8.21
and this effective revert.
* cfg.mk (old_NEWS_hash): Adjust for originally omitted v8.21 entry.
* src/paste.c: s/delimeter/delimiter/ comment typo fix.
This commit is contained in:
Pádraig Brady 2014-05-30 17:44:32 +01:00
parent 39d1c9576a
commit 5c6cf94ba5
5 changed files with 49 additions and 20 deletions

7
NEWS
View File

@ -90,6 +90,10 @@ GNU coreutils NEWS -*- outline -*-
chroot --userspec will now unset supplemental groups associated with root,
and instead use the supplemental groups of the specified user.
cut -d$'\n' again outputs lines identified in the --fields list, having
not done so in v8.21 and v8.22. Note using this non portable functionality
will result in the delayed output of lines.
ls with none of LS_COLORS or COLORTERM environment variables set,
will now honor an empty or unknown TERM environment variable,
and not output colors even with --colors=always.
@ -343,6 +347,9 @@ GNU coreutils NEWS -*- outline -*-
the system by skipping duplicate entries (identified by the device number).
Consequently, df also elides the early-boot pseudo file system type "rootfs".
cut -d$'\n' no longer outputs lines identified in the --fields list,
to align with other implementations and to avoid delayed output of lines.
nl no longer supports the --page-increment option, which has been
deprecated since coreutils-7.5. Use --line-increment instead.

2
cfg.mk
View File

@ -45,7 +45,7 @@ export VERBOSE = yes
# 4914152 9e
export XZ_OPT = -8e
old_NEWS_hash = 68fc9b352e924d5e59e2f543f80f6a41
old_NEWS_hash = adf13e9314300d0dff82fa37b247d7db
# Add an exemption for sc_makefile_at_at_check.
_makefile_at_at_check_exceptions = ' && !/^cu_install_program =/'

View File

@ -109,13 +109,13 @@ enum operating_mode
/* Output characters that are in the given bytes. */
byte_mode,
/* Output the given delimeter-separated fields. */
/* Output the given delimiter-separated fields. */
field_mode
};
static enum operating_mode operating_mode;
/* If true do not output lines containing no delimeter characters.
/* If true do not output lines containing no delimiter characters.
Otherwise, all such lines are printed. This option is valid only
with field mode. */
static bool suppress_non_delimited;
@ -124,7 +124,7 @@ static bool suppress_non_delimited;
those that were specified. */
static bool complement;
/* The delimeter character for field mode. */
/* The delimiter character for field mode. */
static unsigned char delim;
/* True if the --output-delimiter=STRING option was specified. */
@ -538,7 +538,6 @@ cut_fields (FILE *stream)
{
ssize_t len;
size_t n_bytes;
bool got_line;
len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0,
GETNLINE_NO_LIMIT, delim, '\n', stream);
@ -555,14 +554,13 @@ cut_fields (FILE *stream)
assert (n_bytes != 0);
c = 0;
got_line = field_1_buffer[n_bytes - 1] == '\n';
/* If the first field extends to the end of line (it is not
delimited) and we are printing all non-delimited lines,
print this one. */
if (to_uchar (field_1_buffer[n_bytes - 1]) != delim || got_line)
if (to_uchar (field_1_buffer[n_bytes - 1]) != delim)
{
if (suppress_non_delimited && !(got_line && delim == '\n'))
if (suppress_non_delimited)
{
/* Empty. */
}
@ -570,7 +568,7 @@ cut_fields (FILE *stream)
{
fwrite (field_1_buffer, sizeof (char), n_bytes, stdout);
/* Make sure the output line is newline terminated. */
if (! got_line)
if (field_1_buffer[n_bytes - 1] != '\n')
putchar ('\n');
c = '\n';
}
@ -580,7 +578,19 @@ cut_fields (FILE *stream)
{
/* Print the field, but not the trailing delimiter. */
fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout);
found_any_selected_field = true;
/* With -d$'\n' don't treat the last '\n' as a delimiter. */
if (delim == '\n')
{
int last_c = getc (stream);
if (last_c != EOF)
{
ungetc (last_c, stream);
found_any_selected_field = true;
}
}
else
found_any_selected_field = true;
}
next_item (&field_idx);
}
@ -610,12 +620,24 @@ cut_fields (FILE *stream)
}
}
if (c == '\n' || c == EOF)
/* With -d$'\n' don't treat the last '\n' as a delimiter. */
if (delim == '\n' && c == delim)
{
int last_c = getc (stream);
if (last_c != EOF)
ungetc (last_c, stream);
else
c = last_c;
}
if (c == delim)
next_item (&field_idx);
else if (c == '\n' || c == EOF)
{
if (found_any_selected_field
|| !(suppress_non_delimited && field_idx == 1))
{
if (c == '\n' || prev_c != '\n')
if (c == '\n' || prev_c != '\n' || delim == '\n')
putchar ('\n');
}
if (c == EOF)
@ -624,8 +646,6 @@ cut_fields (FILE *stream)
current_rp = rp;
found_any_selected_field = false;
}
else if (c == delim)
next_item (&field_idx);
}
}

View File

@ -62,7 +62,7 @@ static bool have_read_stdin;
corresponding lines from each file in parallel. */
static bool serial_merge;
/* The delimeters between lines of input files (used cyclically). */
/* The delimiters between lines of input files (used cyclically). */
static char *delims;
/* A pointer to the character after the end of 'delims'. */

View File

@ -144,15 +144,17 @@ my @Tests =
['newline-12', '-s', '-d:', '-f1', {IN=>"a:1\nb:"}, {OUT=>"a\nb\n"}],
['newline-13', '-d:', '-f1-', {IN=>"a1:\n:"}, {OUT=>"a1:\n:\n"}],
# newline processing for fields when -d == '\n'
['newline-14', "-d'\n'", '-f1', {IN=>"a:1\nb:"}, {OUT=>"a:1\nb:\n"}],
['newline-14', "-d'\n'", '-f1', {IN=>"a:1\nb:"}, {OUT=>"a:1\n"}],
['newline-15', '-s', "-d'\n'", '-f1', {IN=>"a:1\nb:"}, {OUT=>"a:1\n"}],
['newline-16', '-s', "-d'\n'", '-f2', {IN=>"\nb"}, {OUT=>""}],
['newline-16', '-s', "-d'\n'", '-f2', {IN=>"\nb"}, {OUT=>"b\n"}],
['newline-17', '-s', "-d'\n'", '-f1', {IN=>"\nb"}, {OUT=>"\n"}],
['newline-18', "-d'\n'", '-f2', {IN=>"\nb"}, {OUT=>"\nb\n"}],
['newline-19', "-d'\n'", '-f1', {IN=>"\nb"}, {OUT=>"\nb\n"}],
['newline-18', "-d'\n'", '-f2', {IN=>"\nb"}, {OUT=>"b\n"}],
['newline-19', "-d'\n'", '-f1', {IN=>"\nb"}, {OUT=>"\n"}],
['newline-20', '-s', "-d'\n'", '-f1-', {IN=>"\n"}, {OUT=>"\n"}],
['newline-21', '-s', "-d'\n'", '-f1-', {IN=>"\nb"}, {OUT=>"\n"}],
['newline-21', '-s', "-d'\n'", '-f1-', {IN=>"\nb"}, {OUT=>"\nb\n"}],
['newline-22', "-d'\n'", '-f1-', {IN=>"\nb"}, {OUT=>"\nb\n"}],
['newline-23', "-d'\n'", '-f1-', '--ou=:', {IN=>"a\nb\n"}, {OUT=>"a:b\n"}],
['newline-24', "-d'\n'", '-f1,2', '--ou=:', {IN=>"a\nb\n"}, {OUT=>"a:b\n"}],
# New functionality:
['out-delim1', '-c1-3,5-', '--output-d=:', {IN=>"abcdefg\n"},