sort: don't dump core when merging from input twice

* NEWS: Document this.
* src/sort.c (avoid_trashing_input): The previous fix to this
function didn't fix all the problems with this code.  Replace it
with something simpler: just copy the input file.  This doesn't
change the number of files, so return void instead of the updated
file count.  Caller changed.
* tests/misc/sort-merge-fdlimit: Test for the bug.
This commit is contained in:
Paul Eggert 2010-12-16 00:03:29 -08:00
parent 33a5de5a0e
commit f3c584d1e0
3 changed files with 39 additions and 25 deletions

2
NEWS
View File

@ -23,6 +23,8 @@ GNU coreutils NEWS -*- outline -*-
sort --compress no longer mishandles subprocesses' exit statuses.
sort -m -o f f ... f no longer dumps core when file descriptors are limited.
** New features
split accepts the --number option to generate a specific number of files.

View File

@ -3549,12 +3549,12 @@ sortlines (struct line *restrict lines, size_t nthreads,
pthread_mutex_destroy (&node->lock);
}
/* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
the same as OUTFILE. If found, merge the found instances (and perhaps
some other files) into a temporary file so that it can in turn be
merged into OUTFILE without destroying OUTFILE before it is completely
read. Return the new value of NFILES, which differs from the old if
some merging occurred.
/* Scan through FILES[NTEMPS .. NFILES-1] looking for files that are
the same as OUTFILE. If found, replace each with the same
temporary copy that can be merged into OUTFILE without destroying
OUTFILE before it is completely read. This temporary copy does not
count as a merge temp, so don't worry about incrementing NTEMPS in
the caller; final cleanup will remove it, not zaptemp.
This test ensures that an otherwise-erroneous use like
"sort -m -o FILE ... FILE ..." copies FILE before writing to it.
@ -3566,13 +3566,15 @@ sortlines (struct line *restrict lines, size_t nthreads,
Catching these obscure cases would slow down performance in
common cases. */
static size_t
static void
avoid_trashing_input (struct sortfile *files, size_t ntemps,
size_t nfiles, char const *outfile)
{
size_t i;
bool got_outstat = false;
struct stat outstat;
char const *tempcopy = NULL;
pid_t pid IF_LINT (= 0);
for (i = ntemps; i < nfiles; i++)
{
@ -3603,27 +3605,17 @@ avoid_trashing_input (struct sortfile *files, size_t ntemps,
if (same)
{
FILE *tftp;
pid_t pid;
char *temp = create_temp (&tftp, &pid);
size_t num_merged = 0;
do
if (! tempcopy)
{
num_merged += mergefiles (&files[i], 0, nfiles - i, tftp, temp);
files[i].name = temp;
files[i].pid = pid;
memmove (&files[i + 1], &files[i + num_merged],
(nfiles - (i + num_merged)) * sizeof *files);
ntemps += 1;
nfiles -= num_merged - 1;;
i += num_merged;
FILE *tftp;
tempcopy = create_temp (&tftp, &pid);
mergefiles (&files[i], 0, 1, tftp, tempcopy);
}
while (i < nfiles);
files[i].name = tempcopy;
files[i].pid = pid;
}
}
return nfiles;
}
/* Merge the input FILES. NTEMPS is the number of files at the
@ -3693,7 +3685,7 @@ merge (struct sortfile *files, size_t ntemps, size_t nfiles,
nfiles -= in - out;
}
nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
avoid_trashing_input (files, ntemps, nfiles, output_file);
/* We aren't guaranteed that this final mergefiles will work, therefore we
try to merge into the output, and then merge as much as we can into a

View File

@ -50,4 +50,24 @@ for randsource in '' --random-source=some-data; do
|| ! grep "open failed" err/merge-random-err) || fail=1
done
# 'sort -m' should work in a limited file descriptor
# environment when the output is repeatedly one of its inputs.
# In coreutils 8.7 and earlier, 'sort' would dump core on this test.
#
# This test uses 'exec' to redirect file descriptors rather than
# ordinary redirection on the 'sort' command. This is intended to
# work around bugs in OpenBSD /bin/sh, and some other sh variants,
# that squirrel away file descriptors before closing them; see
# <http://lists.gnu.org/archive/html/bug-tar/2010-10/msg00075.html>.
# This test finds the bug only with shells that do not close FDs on
# exec, and will miss the bug (if present) on other shells, but it's
# not easy to fix this without running afoul of the OpenBSD-like sh bugs.
(seq 6 && echo 6) >exp || fail=1
echo 6 >out || fail=1
(exec 3<&- 4<&- 5<&- 6</dev/null 7<&6 8<&6 9<&6 &&
ulimit -n 10 &&
sort -n -m --batch-size=7 -o out out in/1 in/2 in/3 in/4 in/5 out
) &&
compare out exp || fail=1
Exit $fail