sort: add new option --files0-from=F

* src/sort.c: Support new option.
* tests/misc/sort-files0-from: Test new option.
* tests/misc/Makefile.am: Indicate new test.
* docs/coreutils.texi: Explain new option.
* NEWS: Advertise new option.

Signed-off-by: Bo Borgerson <gigabo@gmail.com>
This commit is contained in:
Bo Borgerson 2008-04-03 18:42:57 -04:00 committed by Jim Meyering
parent 3435bb7f40
commit 322c6f2e5c
5 changed files with 183 additions and 5 deletions

5
NEWS
View File

@ -18,6 +18,11 @@ GNU coreutils NEWS -*- outline -*-
md5sum now accepts the new option, --quiet, to suppress the printing of
'OK' messages. sha1sum, sha224sum, sha384sum, and sha512sum accept it, too.
sort accepts a new option, --files0-from=F, that specifies a file
containing a null-separated list of files to sort. This list is used
instead of filenames passed on the command-line to avoid problems with
maximum command-line (argv) length.
** Bug fixes
chcon --verbose now prints a newline after each message

View File

@ -3172,7 +3172,7 @@ Print only the newline counts.
@opindex --max-line-length
Print only the maximum line lengths.
@macro filesZeroFromOption{cmd,withTotalOption}
@macro filesZeroFromOption{cmd,withTotalOption,subListOutput}
@itemx --files0-from=@var{FILE}
@opindex --files0-from=@var{FILE}
@c This is commented out to avoid a texi2dvi failure.
@ -3184,13 +3184,13 @@ This is useful \withTotalOption\
when the list of file names is so long that it may exceed a command line
length limitation.
In such cases, running @command{\cmd\} via @command{xargs} is undesirable
because it splits the list into pieces and makes @command{\cmd\} print a
total for each sublist rather than for the entire list.
because it splits the list into pieces and makes @command{\cmd\} print
\subListOutput\ for each sublist rather than for the entire list.
One way to produce a list of null-byte-terminated file names is with @sc{gnu}
@command{find}, using its @option{-print0} predicate.
Do not specify any @var{FILE} on the command line when using this option.
@end macro
@filesZeroFromOption{wc,}
@filesZeroFromOption{wc,,a total}
For example, to find the length of the longest line in any @file{.c} or
@file{.h} file in the current hierarchy, do this:
@ -3779,6 +3779,8 @@ Terminate with an error if @var{prog} exits with nonzero status.
White space and the backslash character should not appear in
@var{prog}; they are reserved for future use.
@filesZeroFromOption{sort,,sorted output}
@item -k @var{pos1}[,@var{pos2}]
@itemx --key=@var{pos1}[,@var{pos2}]
@opindex -k

View File

@ -36,7 +36,9 @@
#include "physmem.h"
#include "posixver.h"
#include "quote.h"
#include "quotearg.h"
#include "randread.h"
#include "readtokens0.h"
#include "stdio--.h"
#include "stdlib--.h"
#include "strnumcmp.h"
@ -303,8 +305,9 @@ usage (int status)
{
printf (_("\
Usage: %s [OPTION]... [FILE]...\n\
or: %s [OPTION]... --files0-from=F\n\
"),
program_name);
program_name, program_name);
fputs (_("\
Write sorted concatenation of all FILE(s) to standard output.\n\
\n\
@ -341,6 +344,10 @@ Other options:\n\
-C, --check=quiet, --check=silent like -c, but do not report first bad line\n\
--compress-program=PROG compress temporaries with PROG;\n\
decompress them with PROG -d\n\
--files0-from=F read input from the files specified by\n\
NUL-terminated names in file F\n\
"), stdout);
fputs (_("\
-k, --key=POS1[,POS2] start a key at POS1, end it at POS2 (origin 1)\n\
-m, --merge merge already sorted files; do not sort\n\
"), stdout);
@ -393,6 +400,7 @@ enum
{
CHECK_OPTION = CHAR_MAX + 1,
COMPRESS_PROGRAM_OPTION,
FILES0_FROM_OPTION,
RANDOM_SOURCE_OPTION,
SORT_OPTION
};
@ -406,6 +414,7 @@ static struct option const long_options[] =
{"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION},
{"dictionary-order", no_argument, NULL, 'd'},
{"ignore-case", no_argument, NULL, 'f'},
{"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
{"general-numeric-sort", no_argument, NULL, 'g'},
{"ignore-nonprinting", no_argument, NULL, 'i'},
{"key", required_argument, NULL, 'k'},
@ -2751,6 +2760,8 @@ main (int argc, char **argv)
bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
bool obsolete_usage = (posix2_version () < 200112);
char **files;
char *files_from = NULL;
struct Tokens tok;
char const *outfile = NULL;
initialize_main (&argc, &argv);
@ -2954,6 +2965,10 @@ main (int argc, char **argv)
compress_program = optarg;
break;
case FILES0_FROM_OPTION:
files_from = optarg;
break;
case 'k':
key = key_init (&key_buf);
@ -3098,6 +3113,65 @@ main (int argc, char **argv)
}
}
if (files_from)
{
FILE *stream;
/* When using --files0-from=F, you may not specify any files
on the command-line. */
if (nfiles)
{
error (0, 0, _("extra operand %s"), quote (files[0]));
fprintf (stderr, "%s\n",
_("file operands cannot be combined with --files0-from"));
usage (SORT_FAILURE);
}
if (STREQ (files_from, "-"))
stream = stdin;
else
{
stream = fopen (files_from, "r");
if (stream == NULL)
error (SORT_FAILURE, errno, _("cannot open %s for reading"),
quote (files_from));
}
readtokens0_init (&tok);
if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
error (SORT_FAILURE, 0, _("cannot read file names from %s"),
quote (files_from));
if (tok.n_tok)
{
size_t i;
free (files);
files = tok.tok;
nfiles = tok.n_tok;
for (i = 0; i < nfiles; i++)
{
if (STREQ (files[i], "-"))
error (SORT_FAILURE, 0, _("when reading file names from stdin, "
"no file name of %s allowed"),
quote (files[i]));
else if (files[i][0] == '\0')
{
/* Using the standard `filename:line-number:' prefix here is
not totally appropriate, since NUL is the separator, not NL,
but it might be better than nothing. */
unsigned long int file_number = i + 1;
error (SORT_FAILURE, 0,
_("%s:%lu: invalid zero-length file name"),
quotearg_colon (files_from), file_number);
}
}
}
else
error (SORT_FAILURE, 0, _("no input from %s"),
quote (files_from));
}
/* Inheritance of global options to individual keys. */
for (key = keylist; key; key = key->next)
{

View File

@ -193,6 +193,7 @@ TESTS = \
misc/shuf \
misc/sort \
misc/sort-compress \
misc/sort-files0-from \
misc/sort-merge \
misc/sort-rand \
misc/split-a \

96
tests/misc/sort-files0-from Executable file
View File

@ -0,0 +1,96 @@
#!/usr/bin/perl
# Exercise sort's --files0-from option.
# FIXME: keep this file in sync with tests/du/files0-from.
# Copyright (C) 2006-2008 Free Software Foundation, Inc.
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
use strict;
(my $program_name = $0) =~ s|.*/||;
my $prog = 'sort';
# Turn off localization of executable's output.
@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
my @Tests =
(
# invalid extra command line argument
['f-extra-arg', '--files0-from=- no-such', {IN=>"a"}, {EXIT=>2},
{ERR => "$prog: extra operand `no-such'\n"
. "file operands cannot be combined with --files0-from\n"
. "Try `$prog --help' for more information.\n"}
],
# missing input file
['missing', '--files0-from=missing', {EXIT=>2},
{ERR => "$prog: cannot open `missing' for reading: "
. "No such file or directory\n"}],
# input file name of '-'
['minus-in-stdin', '--files0-from=-', '<', {IN=>{f=>'-'}}, {EXIT=>2},
{ERR => "$prog: when reading file names from stdin, no file name of"
. " `-' allowed\n"}],
# empty input, regular file
['empty', '--files0-from=@AUX@', {AUX=>''}, {EXIT=>2},
{ERR_SUBST => 's/no input from.+$//'}, {ERR => "$prog: \n"}],
# empty input, from non-regular file
['empty-nonreg', '--files0-from=/dev/null', {EXIT=>2},
{ERR => "$prog: no input from `/dev/null'\n"}],
# one NUL
['nul-1', '--files0-from=-', '<', {IN=>"\0"}, {EXIT=>2},
{ERR => "$prog: -:1: invalid zero-length file name\n"}],
# two NULs
# Note that the behavior here differs from `wc' in that the
# first zero-length file name is treated as fatal, so there
# is only one line of diagnostic output.
['nul-2', '--files0-from=-', '<', {IN=>"\0\0"}, {EXIT=>2},
{ERR => "$prog: -:1: invalid zero-length file name\n"}],
# one file name, no NUL
['1', '--files0-from=-', '<',
{IN=>{f=>"g"}}, {AUX=>{g=>'a'}}, {OUT=>"a\n"} ],
# one file name, with NUL
['1a', '--files0-from=-', '<',
{IN=>{f=>"g\0"}}, {AUX=>{g=>'a'}}, {OUT=>"a\n"} ],
# two file names, no final NUL
['2', '--files0-from=-', '<',
{IN=>{f=>"g\0g"}}, {AUX=>{g=>'a'}}, {OUT=>"a\na\n"} ],
# two file names, with final NUL
['2a', '--files0-from=-', '<',
{IN=>{f=>"g\0g\0"}}, {AUX=>{g=>'a'}}, {OUT=>"a\na\n"} ],
# Ensure that $prog performs no processing when there is a zero-length filename
# Note that the behavior here differs from `wc' in that the
# first zero-length file name is treated as fatal, so there
# should be no output on STDOUT.
['zero-len', '--files0-from=-', '<',
{IN=>{f=>"\0g\0"}}, {AUX=>{g=>''}},
{ERR => "$prog: -:1: invalid zero-length file name\n"}, {EXIT=>2} ],
);
my $save_temps = $ENV{DEBUG};
my $verbose = $ENV{VERBOSE};
my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose);
exit $fail;