mirror of
https://github.com/coreutils/coreutils.git
synced 2024-11-24 10:23:31 +08:00
sort: don't dump core when merging from input twice
* NEWS: Document this. * src/sort.c (avoid_trashing_input): The previous fix to this function didn't fix all the problems with this code. Replace it with something simpler: just copy the input file. This doesn't change the number of files, so return void instead of the updated file count. Caller changed. * tests/misc/sort-merge-fdlimit: Test for the bug.
This commit is contained in:
parent
33a5de5a0e
commit
f3c584d1e0
2
NEWS
2
NEWS
@ -23,6 +23,8 @@ GNU coreutils NEWS -*- outline -*-
|
||||
|
||||
sort --compress no longer mishandles subprocesses' exit statuses.
|
||||
|
||||
sort -m -o f f ... f no longer dumps core when file descriptors are limited.
|
||||
|
||||
** New features
|
||||
|
||||
split accepts the --number option to generate a specific number of files.
|
||||
|
42
src/sort.c
42
src/sort.c
@ -3549,12 +3549,12 @@ sortlines (struct line *restrict lines, size_t nthreads,
|
||||
pthread_mutex_destroy (&node->lock);
|
||||
}
|
||||
|
||||
/* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
|
||||
the same as OUTFILE. If found, merge the found instances (and perhaps
|
||||
some other files) into a temporary file so that it can in turn be
|
||||
merged into OUTFILE without destroying OUTFILE before it is completely
|
||||
read. Return the new value of NFILES, which differs from the old if
|
||||
some merging occurred.
|
||||
/* Scan through FILES[NTEMPS .. NFILES-1] looking for files that are
|
||||
the same as OUTFILE. If found, replace each with the same
|
||||
temporary copy that can be merged into OUTFILE without destroying
|
||||
OUTFILE before it is completely read. This temporary copy does not
|
||||
count as a merge temp, so don't worry about incrementing NTEMPS in
|
||||
the caller; final cleanup will remove it, not zaptemp.
|
||||
|
||||
This test ensures that an otherwise-erroneous use like
|
||||
"sort -m -o FILE ... FILE ..." copies FILE before writing to it.
|
||||
@ -3566,13 +3566,15 @@ sortlines (struct line *restrict lines, size_t nthreads,
|
||||
Catching these obscure cases would slow down performance in
|
||||
common cases. */
|
||||
|
||||
static size_t
|
||||
static void
|
||||
avoid_trashing_input (struct sortfile *files, size_t ntemps,
|
||||
size_t nfiles, char const *outfile)
|
||||
{
|
||||
size_t i;
|
||||
bool got_outstat = false;
|
||||
struct stat outstat;
|
||||
char const *tempcopy = NULL;
|
||||
pid_t pid IF_LINT (= 0);
|
||||
|
||||
for (i = ntemps; i < nfiles; i++)
|
||||
{
|
||||
@ -3603,27 +3605,17 @@ avoid_trashing_input (struct sortfile *files, size_t ntemps,
|
||||
|
||||
if (same)
|
||||
{
|
||||
FILE *tftp;
|
||||
pid_t pid;
|
||||
char *temp = create_temp (&tftp, &pid);
|
||||
size_t num_merged = 0;
|
||||
do
|
||||
if (! tempcopy)
|
||||
{
|
||||
num_merged += mergefiles (&files[i], 0, nfiles - i, tftp, temp);
|
||||
files[i].name = temp;
|
||||
files[i].pid = pid;
|
||||
|
||||
memmove (&files[i + 1], &files[i + num_merged],
|
||||
(nfiles - (i + num_merged)) * sizeof *files);
|
||||
ntemps += 1;
|
||||
nfiles -= num_merged - 1;;
|
||||
i += num_merged;
|
||||
FILE *tftp;
|
||||
tempcopy = create_temp (&tftp, &pid);
|
||||
mergefiles (&files[i], 0, 1, tftp, tempcopy);
|
||||
}
|
||||
while (i < nfiles);
|
||||
|
||||
files[i].name = tempcopy;
|
||||
files[i].pid = pid;
|
||||
}
|
||||
}
|
||||
|
||||
return nfiles;
|
||||
}
|
||||
|
||||
/* Merge the input FILES. NTEMPS is the number of files at the
|
||||
@ -3693,7 +3685,7 @@ merge (struct sortfile *files, size_t ntemps, size_t nfiles,
|
||||
nfiles -= in - out;
|
||||
}
|
||||
|
||||
nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
|
||||
avoid_trashing_input (files, ntemps, nfiles, output_file);
|
||||
|
||||
/* We aren't guaranteed that this final mergefiles will work, therefore we
|
||||
try to merge into the output, and then merge as much as we can into a
|
||||
|
@ -50,4 +50,24 @@ for randsource in '' --random-source=some-data; do
|
||||
|| ! grep "open failed" err/merge-random-err) || fail=1
|
||||
done
|
||||
|
||||
# 'sort -m' should work in a limited file descriptor
|
||||
# environment when the output is repeatedly one of its inputs.
|
||||
# In coreutils 8.7 and earlier, 'sort' would dump core on this test.
|
||||
#
|
||||
# This test uses 'exec' to redirect file descriptors rather than
|
||||
# ordinary redirection on the 'sort' command. This is intended to
|
||||
# work around bugs in OpenBSD /bin/sh, and some other sh variants,
|
||||
# that squirrel away file descriptors before closing them; see
|
||||
# <http://lists.gnu.org/archive/html/bug-tar/2010-10/msg00075.html>.
|
||||
# This test finds the bug only with shells that do not close FDs on
|
||||
# exec, and will miss the bug (if present) on other shells, but it's
|
||||
# not easy to fix this without running afoul of the OpenBSD-like sh bugs.
|
||||
(seq 6 && echo 6) >exp || fail=1
|
||||
echo 6 >out || fail=1
|
||||
(exec 3<&- 4<&- 5<&- 6</dev/null 7<&6 8<&6 9<&6 &&
|
||||
ulimit -n 10 &&
|
||||
sort -n -m --batch-size=7 -o out out in/1 in/2 in/3 in/4 in/5 out
|
||||
) &&
|
||||
compare out exp || fail=1
|
||||
|
||||
Exit $fail
|
||||
|
Loading…
Reference in New Issue
Block a user