mirror of
https://github.com/coreutils/coreutils.git
synced 2024-11-24 10:23:31 +08:00
cut: restore special case handling of -f with -d$'\n'
commits v8.20-98-g51ce0bf and v8.20-99-gd302aed changed cut(1) to process each line independently and thus promptly output each line without buffering. As part of those changes we removed the special handling of --delimiter=$'\n' --fields=... which could be used to select arbitrary (ranges of) lines, so as to simplify and optimize the implementation while also matching the behavior of different cut(1) implementations. However that GNU behavior was in place for a long time, and could be useful in certain cases like making a separated list like `seq 10 | cut -f1- -d$'\n' --output-delimiter=,` although other tools like head(1) and paste(1) are more suited to this operation. This patch reinstates that functionality but restricts the "line behind" buffering behavior to only the -d$'\n' case. We also fix the following related edge case to be more consistent: before> printf "\n" | cut -s -d$'\n' -f1- | wc -l 2 before> printf "\n" | cut -d$'\n' -f1- | wc -l 1 after > printf "\n" | cut -s -d$'\n' -f1- | wc -l 1 after > printf "\n" | cut -d$'\n' -f1- | wc -l 1 * src/cut.c (cut_fields): Adjust as discussed above. * tests/misc/cut.pl: Likewise. * NEWS: Mention the change in behavior both for v8.21 and this effective revert. * cfg.mk (old_NEWS_hash): Adjust for originally omitted v8.21 entry. * src/paste.c: s/delimeter/delimiter/ comment typo fix.
This commit is contained in:
parent
39d1c9576a
commit
5c6cf94ba5
7
NEWS
7
NEWS
@ -90,6 +90,10 @@ GNU coreutils NEWS -*- outline -*-
|
||||
chroot --userspec will now unset supplemental groups associated with root,
|
||||
and instead use the supplemental groups of the specified user.
|
||||
|
||||
cut -d$'\n' again outputs lines identified in the --fields list, having
|
||||
not done so in v8.21 and v8.22. Note using this non portable functionality
|
||||
will result in the delayed output of lines.
|
||||
|
||||
ls with none of LS_COLORS or COLORTERM environment variables set,
|
||||
will now honor an empty or unknown TERM environment variable,
|
||||
and not output colors even with --colors=always.
|
||||
@ -343,6 +347,9 @@ GNU coreutils NEWS -*- outline -*-
|
||||
the system by skipping duplicate entries (identified by the device number).
|
||||
Consequently, df also elides the early-boot pseudo file system type "rootfs".
|
||||
|
||||
cut -d$'\n' no longer outputs lines identified in the --fields list,
|
||||
to align with other implementations and to avoid delayed output of lines.
|
||||
|
||||
nl no longer supports the --page-increment option, which has been
|
||||
deprecated since coreutils-7.5. Use --line-increment instead.
|
||||
|
||||
|
2
cfg.mk
2
cfg.mk
@ -45,7 +45,7 @@ export VERBOSE = yes
|
||||
# 4914152 9e
|
||||
export XZ_OPT = -8e
|
||||
|
||||
old_NEWS_hash = 68fc9b352e924d5e59e2f543f80f6a41
|
||||
old_NEWS_hash = adf13e9314300d0dff82fa37b247d7db
|
||||
|
||||
# Add an exemption for sc_makefile_at_at_check.
|
||||
_makefile_at_at_check_exceptions = ' && !/^cu_install_program =/'
|
||||
|
46
src/cut.c
46
src/cut.c
@ -109,13 +109,13 @@ enum operating_mode
|
||||
/* Output characters that are in the given bytes. */
|
||||
byte_mode,
|
||||
|
||||
/* Output the given delimeter-separated fields. */
|
||||
/* Output the given delimiter-separated fields. */
|
||||
field_mode
|
||||
};
|
||||
|
||||
static enum operating_mode operating_mode;
|
||||
|
||||
/* If true do not output lines containing no delimeter characters.
|
||||
/* If true do not output lines containing no delimiter characters.
|
||||
Otherwise, all such lines are printed. This option is valid only
|
||||
with field mode. */
|
||||
static bool suppress_non_delimited;
|
||||
@ -124,7 +124,7 @@ static bool suppress_non_delimited;
|
||||
those that were specified. */
|
||||
static bool complement;
|
||||
|
||||
/* The delimeter character for field mode. */
|
||||
/* The delimiter character for field mode. */
|
||||
static unsigned char delim;
|
||||
|
||||
/* True if the --output-delimiter=STRING option was specified. */
|
||||
@ -538,7 +538,6 @@ cut_fields (FILE *stream)
|
||||
{
|
||||
ssize_t len;
|
||||
size_t n_bytes;
|
||||
bool got_line;
|
||||
|
||||
len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0,
|
||||
GETNLINE_NO_LIMIT, delim, '\n', stream);
|
||||
@ -555,14 +554,13 @@ cut_fields (FILE *stream)
|
||||
assert (n_bytes != 0);
|
||||
|
||||
c = 0;
|
||||
got_line = field_1_buffer[n_bytes - 1] == '\n';
|
||||
|
||||
/* If the first field extends to the end of line (it is not
|
||||
delimited) and we are printing all non-delimited lines,
|
||||
print this one. */
|
||||
if (to_uchar (field_1_buffer[n_bytes - 1]) != delim || got_line)
|
||||
if (to_uchar (field_1_buffer[n_bytes - 1]) != delim)
|
||||
{
|
||||
if (suppress_non_delimited && !(got_line && delim == '\n'))
|
||||
if (suppress_non_delimited)
|
||||
{
|
||||
/* Empty. */
|
||||
}
|
||||
@ -570,7 +568,7 @@ cut_fields (FILE *stream)
|
||||
{
|
||||
fwrite (field_1_buffer, sizeof (char), n_bytes, stdout);
|
||||
/* Make sure the output line is newline terminated. */
|
||||
if (! got_line)
|
||||
if (field_1_buffer[n_bytes - 1] != '\n')
|
||||
putchar ('\n');
|
||||
c = '\n';
|
||||
}
|
||||
@ -580,7 +578,19 @@ cut_fields (FILE *stream)
|
||||
{
|
||||
/* Print the field, but not the trailing delimiter. */
|
||||
fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout);
|
||||
found_any_selected_field = true;
|
||||
|
||||
/* With -d$'\n' don't treat the last '\n' as a delimiter. */
|
||||
if (delim == '\n')
|
||||
{
|
||||
int last_c = getc (stream);
|
||||
if (last_c != EOF)
|
||||
{
|
||||
ungetc (last_c, stream);
|
||||
found_any_selected_field = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
found_any_selected_field = true;
|
||||
}
|
||||
next_item (&field_idx);
|
||||
}
|
||||
@ -610,12 +620,24 @@ cut_fields (FILE *stream)
|
||||
}
|
||||
}
|
||||
|
||||
if (c == '\n' || c == EOF)
|
||||
/* With -d$'\n' don't treat the last '\n' as a delimiter. */
|
||||
if (delim == '\n' && c == delim)
|
||||
{
|
||||
int last_c = getc (stream);
|
||||
if (last_c != EOF)
|
||||
ungetc (last_c, stream);
|
||||
else
|
||||
c = last_c;
|
||||
}
|
||||
|
||||
if (c == delim)
|
||||
next_item (&field_idx);
|
||||
else if (c == '\n' || c == EOF)
|
||||
{
|
||||
if (found_any_selected_field
|
||||
|| !(suppress_non_delimited && field_idx == 1))
|
||||
{
|
||||
if (c == '\n' || prev_c != '\n')
|
||||
if (c == '\n' || prev_c != '\n' || delim == '\n')
|
||||
putchar ('\n');
|
||||
}
|
||||
if (c == EOF)
|
||||
@ -624,8 +646,6 @@ cut_fields (FILE *stream)
|
||||
current_rp = rp;
|
||||
found_any_selected_field = false;
|
||||
}
|
||||
else if (c == delim)
|
||||
next_item (&field_idx);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -62,7 +62,7 @@ static bool have_read_stdin;
|
||||
corresponding lines from each file in parallel. */
|
||||
static bool serial_merge;
|
||||
|
||||
/* The delimeters between lines of input files (used cyclically). */
|
||||
/* The delimiters between lines of input files (used cyclically). */
|
||||
static char *delims;
|
||||
|
||||
/* A pointer to the character after the end of 'delims'. */
|
||||
|
@ -144,15 +144,17 @@ my @Tests =
|
||||
['newline-12', '-s', '-d:', '-f1', {IN=>"a:1\nb:"}, {OUT=>"a\nb\n"}],
|
||||
['newline-13', '-d:', '-f1-', {IN=>"a1:\n:"}, {OUT=>"a1:\n:\n"}],
|
||||
# newline processing for fields when -d == '\n'
|
||||
['newline-14', "-d'\n'", '-f1', {IN=>"a:1\nb:"}, {OUT=>"a:1\nb:\n"}],
|
||||
['newline-14', "-d'\n'", '-f1', {IN=>"a:1\nb:"}, {OUT=>"a:1\n"}],
|
||||
['newline-15', '-s', "-d'\n'", '-f1', {IN=>"a:1\nb:"}, {OUT=>"a:1\n"}],
|
||||
['newline-16', '-s', "-d'\n'", '-f2', {IN=>"\nb"}, {OUT=>""}],
|
||||
['newline-16', '-s', "-d'\n'", '-f2', {IN=>"\nb"}, {OUT=>"b\n"}],
|
||||
['newline-17', '-s', "-d'\n'", '-f1', {IN=>"\nb"}, {OUT=>"\n"}],
|
||||
['newline-18', "-d'\n'", '-f2', {IN=>"\nb"}, {OUT=>"\nb\n"}],
|
||||
['newline-19', "-d'\n'", '-f1', {IN=>"\nb"}, {OUT=>"\nb\n"}],
|
||||
['newline-18', "-d'\n'", '-f2', {IN=>"\nb"}, {OUT=>"b\n"}],
|
||||
['newline-19', "-d'\n'", '-f1', {IN=>"\nb"}, {OUT=>"\n"}],
|
||||
['newline-20', '-s', "-d'\n'", '-f1-', {IN=>"\n"}, {OUT=>"\n"}],
|
||||
['newline-21', '-s', "-d'\n'", '-f1-', {IN=>"\nb"}, {OUT=>"\n"}],
|
||||
['newline-21', '-s', "-d'\n'", '-f1-', {IN=>"\nb"}, {OUT=>"\nb\n"}],
|
||||
['newline-22', "-d'\n'", '-f1-', {IN=>"\nb"}, {OUT=>"\nb\n"}],
|
||||
['newline-23', "-d'\n'", '-f1-', '--ou=:', {IN=>"a\nb\n"}, {OUT=>"a:b\n"}],
|
||||
['newline-24', "-d'\n'", '-f1,2', '--ou=:', {IN=>"a\nb\n"}, {OUT=>"a:b\n"}],
|
||||
|
||||
# New functionality:
|
||||
['out-delim1', '-c1-3,5-', '--output-d=:', {IN=>"abcdefg\n"},
|
||||
|
Loading…
Reference in New Issue
Block a user