mirror of
https://github.com/coreutils/coreutils.git
synced 2024-11-24 10:23:31 +08:00
wc: read and process --files0-from= input a name at a time,
when the file name list is not too large. Before, wc would always read the entire file name list into memory and *then* process each file name. wc does read the list into memory when the list is known not to be too large; this is done in order to be able to align the output numbers, as it does with arguments specified on the command-line * src/wc.c: Include "argv-iter.h". (main): Rewrite to use argv-iter when the input file name list is known to be too large. * NEWS (Bug fixes): Mention it.
This commit is contained in:
parent
031e2fb5e9
commit
c2e56e0de7
4
NEWS
4
NEWS
@ -20,6 +20,10 @@ GNU coreutils NEWS -*- outline -*-
|
||||
Even with this patch, on some systems, it still produces invalid output,
|
||||
from 3 to at least 1026 lines long. [bug introduced in coreutils-6.11]
|
||||
|
||||
wc --files0-from=FILE no longer reads all of FILE into RAM, before
|
||||
processing the first file name, unless the list of names is known
|
||||
to be small enough.
|
||||
|
||||
** Changes in behavior
|
||||
|
||||
cp and mv: the --reply={yes,no,query} option has been removed.
|
||||
|
147
src/wc.c
147
src/wc.c
@ -20,14 +20,17 @@
|
||||
#include <config.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <getopt.h>
|
||||
#include <sys/types.h>
|
||||
#include <wchar.h>
|
||||
#include <wctype.h>
|
||||
|
||||
#include "system.h"
|
||||
#include "argv-iter.h"
|
||||
#include "error.h"
|
||||
#include "mbchar.h"
|
||||
#include "physmem.h"
|
||||
#include "quote.h"
|
||||
#include "quotearg.h"
|
||||
#include "readtokens0.h"
|
||||
@ -515,17 +518,19 @@ wc_file (char const *file, struct fstatus *fstatus)
|
||||
/* Return the file status for the NFILES files addressed by FILE.
|
||||
Optimize the case where only one number is printed, for just one
|
||||
file; in that case we can use a print width of 1, so we don't need
|
||||
to stat the file. */
|
||||
to stat the file. Handle the case of (nfiles == 0) in the same way;
|
||||
that happens when we don't know how long the list of file names will be. */
|
||||
|
||||
static struct fstatus *
|
||||
get_input_fstatus (int nfiles, char * const *file)
|
||||
get_input_fstatus (int nfiles, char *const *file)
|
||||
{
|
||||
struct fstatus *fstatus = xnmalloc (nfiles, sizeof *fstatus);
|
||||
struct fstatus *fstatus = xnmalloc (nfiles ? nfiles : 1, sizeof *fstatus);
|
||||
|
||||
if (nfiles == 1
|
||||
&& ((print_lines + print_words + print_chars
|
||||
+ print_bytes + print_linelength)
|
||||
== 1))
|
||||
if (nfiles == 0
|
||||
|| (nfiles == 1
|
||||
&& ((print_lines + print_words + print_chars
|
||||
+ print_bytes + print_linelength)
|
||||
== 1)))
|
||||
fstatus[0].failed = 1;
|
||||
else
|
||||
{
|
||||
@ -577,7 +582,6 @@ compute_number_width (int nfiles, struct fstatus const *fstatus)
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
int i;
|
||||
bool ok;
|
||||
int optc;
|
||||
int nfiles;
|
||||
@ -637,6 +641,8 @@ main (int argc, char **argv)
|
||||
| print_linelength))
|
||||
print_lines = print_words = print_bytes = true;
|
||||
|
||||
bool read_tokens = false;
|
||||
struct argv_iterator *ai;
|
||||
if (files_from)
|
||||
{
|
||||
FILE *stream;
|
||||
@ -661,69 +667,114 @@ main (int argc, char **argv)
|
||||
quote (files_from));
|
||||
}
|
||||
|
||||
readtokens0_init (&tok);
|
||||
|
||||
if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
|
||||
error (EXIT_FAILURE, 0, _("cannot read file names from %s"),
|
||||
quote (files_from));
|
||||
|
||||
files = tok.tok;
|
||||
nfiles = tok.n_tok;
|
||||
/* Read the file list into RAM if we can detect its size and that
|
||||
size is reasonable. Otherwise, we'll read a name at a time. */
|
||||
struct stat st;
|
||||
if (fstat (fileno (stream), &st) == 0
|
||||
&& S_ISREG (st.st_mode)
|
||||
&& st.st_size <= MIN (10 * 1024 * 1024, physmem_available () / 2))
|
||||
{
|
||||
read_tokens = true;
|
||||
readtokens0_init (&tok);
|
||||
if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
|
||||
error (EXIT_FAILURE, 0, _("cannot read file names from %s"),
|
||||
quote (files_from));
|
||||
files = tok.tok;
|
||||
nfiles = tok.n_tok;
|
||||
ai = argv_iter_init_argv (files);
|
||||
}
|
||||
else
|
||||
{
|
||||
files = NULL;
|
||||
nfiles = 0;
|
||||
ai = argv_iter_init_stream (stream);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
static char *stdin_only[2];
|
||||
static char *stdin_only[] = { NULL };
|
||||
files = (optind < argc ? argv + optind : stdin_only);
|
||||
nfiles = (optind < argc ? argc - optind : 1);
|
||||
stdin_only[0] = NULL;
|
||||
ai = argv_iter_init_argv (files);
|
||||
}
|
||||
|
||||
fstatus = get_input_fstatus (nfiles, files);
|
||||
number_width = compute_number_width (nfiles, fstatus);
|
||||
|
||||
int i;
|
||||
ok = true;
|
||||
for (i = 0; i < nfiles; i++)
|
||||
for (i = 0; /* */; i++)
|
||||
{
|
||||
if (files[i])
|
||||
bool skip_file = false;
|
||||
enum argv_iter_err ai_err;
|
||||
char *file_name = argv_iter (ai, &ai_err);
|
||||
if (ai_err == AI_ERR_EOF)
|
||||
break;
|
||||
if (!file_name)
|
||||
{
|
||||
if (files_from && STREQ (files_from, "-") && STREQ (files[i], "-"))
|
||||
switch (ai_err)
|
||||
{
|
||||
ok = false;
|
||||
/* Give a better diagnostic in an unusual case:
|
||||
printf - | wc --files0-from=- */
|
||||
error (0, 0, _("when reading file names from stdin, "
|
||||
"no file name of %s allowed"),
|
||||
quote ("-"));
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Diagnose a zero-length file name. When it's one
|
||||
among many, knowing the record number may help. */
|
||||
if (files[i][0] == '\0')
|
||||
{
|
||||
ok = false;
|
||||
if (files_from)
|
||||
{
|
||||
/* Using the standard `filename:line-number:' prefix here is
|
||||
not totally appropriate, since NUL is the separator, not NL,
|
||||
but it might be better than nothing. */
|
||||
unsigned long int file_number = i + 1;
|
||||
error (0, 0, "%s:%lu: %s", quotearg_colon (files_from),
|
||||
file_number, _("invalid zero-length file name"));
|
||||
}
|
||||
else
|
||||
error (0, 0, "%s", _("invalid zero-length file name"));
|
||||
case AI_ERR_READ:
|
||||
error (0, errno, _("%s: read error"), quote (files_from));
|
||||
skip_file = true;
|
||||
continue;
|
||||
case AI_ERR_MEM:
|
||||
xalloc_die ();
|
||||
default:
|
||||
assert (!"unexpected error code from argv_iter");
|
||||
}
|
||||
}
|
||||
if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-"))
|
||||
{
|
||||
/* Give a better diagnostic in an unusual case:
|
||||
printf - | wc --files0-from=- */
|
||||
error (0, 0, _("when reading file names from stdin, "
|
||||
"no file name of %s allowed"),
|
||||
quote (file_name));
|
||||
skip_file = true;
|
||||
}
|
||||
|
||||
ok &= wc_file (files[i], &fstatus[i]);
|
||||
if (!file_name[0])
|
||||
{
|
||||
/* Diagnose a zero-length file name. When it's one
|
||||
among many, knowing the record number may help.
|
||||
FIXME: currently print the record number only with
|
||||
--files0-from=FILE. Maybe do it for argv, too? */
|
||||
if (files_from == NULL)
|
||||
error (0, 0, "%s", _("invalid zero-length file name"));
|
||||
else
|
||||
{
|
||||
/* Using the standard `filename:line-number:' prefix here is
|
||||
not totally appropriate, since NUL is the separator, not NL,
|
||||
but it might be better than nothing. */
|
||||
unsigned long int file_number = argv_iter_n_args (ai);
|
||||
error (0, 0, "%s:%lu: %s", quotearg_colon (files_from),
|
||||
file_number, _("invalid zero-length file name"));
|
||||
}
|
||||
skip_file = true;
|
||||
}
|
||||
|
||||
if (skip_file)
|
||||
ok = false;
|
||||
else
|
||||
ok &= wc_file (file_name, &fstatus[nfiles ? i : 0]);
|
||||
}
|
||||
|
||||
if (1 < nfiles)
|
||||
/* No arguments on the command line is fine. That means read from stdin.
|
||||
However, no arguments on the --files0-from input stream is an error
|
||||
means don't read anything. */
|
||||
if (ok && !files_from && argv_iter_n_args (ai) == 0)
|
||||
ok &= wc_file (NULL, &fstatus[0]);
|
||||
|
||||
if (read_tokens)
|
||||
readtokens0_free (&tok);
|
||||
|
||||
if (1 < argv_iter_n_args (ai))
|
||||
write_counts (total_lines, total_words, total_chars, total_bytes,
|
||||
max_line_length, _("total"));
|
||||
|
||||
argv_iter_free (ai);
|
||||
|
||||
free (fstatus);
|
||||
|
||||
if (have_read_stdin && close (STDIN_FILENO) != 0)
|
||||
|
Loading…
Reference in New Issue
Block a user