mirror of
https://github.com/coreutils/coreutils.git
synced 2024-11-28 04:24:45 +08:00
join: improve memory management
* src/join.c (struct seq): Use a (struct line **) for `lines' rather than one long (struct line *). This allows individual lines to be swapped out if necessary. (reset_line): Get a line ready for new input. (init_linep): Create a new line and assign it to the the pointer passed in. (spareline[2]): Hold a spare line for each input file. (free_spareline): Clean up. (get_line): Take a (struct line **) instead of a (struct line *). If the line to be overwritten is the previous line for the current file then swap it out for the spare. (join): Accomodate new structure of SEQs and new parameters to get_line; Don't free stale lines until the end -- they're re-usable now. (dup_line): Remove function. * NEWS: Mention the performance improvement.
This commit is contained in:
parent
58db1bb942
commit
94cecb5cf6
2
NEWS
2
NEWS
@ -31,6 +31,8 @@ GNU coreutils NEWS -*- outline -*-
|
||||
HP-UX 11, Tru64, AIX, IRIX 6.5, and Cygwin, "ls -l" now displays the presence
|
||||
of an ACL on a file via a '+' sign after the mode, and "cp -p" copies ACLs.
|
||||
|
||||
join has significantly better performance due to better memory management
|
||||
|
||||
od now aligns fields across lines when printing multiple -t
|
||||
specifiers, and no longer prints fields that resulted entirely from
|
||||
padding the input out to the least common multiple width.
|
||||
|
168
src/join.c
168
src/join.c
@ -40,6 +40,12 @@
|
||||
|
||||
#define join system_join
|
||||
|
||||
#define SWAPLINES(a, b) do { \
|
||||
struct line *tmp = a; \
|
||||
a = b; \
|
||||
b = tmp; \
|
||||
} while (0);
|
||||
|
||||
/* An element of the list identifying which fields to print for each
|
||||
output line. */
|
||||
struct outlist
|
||||
@ -76,11 +82,16 @@ struct seq
|
||||
{
|
||||
size_t count; /* Elements used in `lines'. */
|
||||
size_t alloc; /* Elements allocated in `lines'. */
|
||||
struct line *lines;
|
||||
struct line **lines;
|
||||
};
|
||||
|
||||
/* The previous line read from each file. */
|
||||
static struct line *prevline[2];
|
||||
static struct line *prevline[2] = {NULL, NULL};
|
||||
|
||||
/* This provides an extra line buffer for each file. We need these if we
|
||||
try to read two consecutive lines into the same buffer, since we don't
|
||||
want to overwrite the previous buffer before we check order. */
|
||||
static struct line *spareline[2] = {NULL, NULL};
|
||||
|
||||
/* True if the LC_COLLATE locale is hard. */
|
||||
static bool hard_LC_COLLATE;
|
||||
@ -257,33 +268,6 @@ xfields (struct line *line)
|
||||
extract_field (line, ptr, lim - ptr);
|
||||
}
|
||||
|
||||
static struct line *
|
||||
dup_line (const struct line *old)
|
||||
{
|
||||
struct line *newline = xmalloc (sizeof *newline);
|
||||
size_t i;
|
||||
|
||||
/* Duplicate the buffer. */
|
||||
initbuffer (&newline->buf);
|
||||
newline->buf.buffer = xmalloc (old->buf.size);
|
||||
newline->buf.size = old->buf.size;
|
||||
memcpy (newline->buf.buffer, old->buf.buffer, old->buf.length);
|
||||
newline->buf.length = old->buf.length;
|
||||
|
||||
/* Duplicate the field positions. */
|
||||
newline->fields = xnmalloc (old->nfields_allocated, sizeof *newline->fields);
|
||||
newline->nfields = old->nfields;
|
||||
newline->nfields_allocated = old->nfields_allocated;
|
||||
|
||||
for (i = 0; i < old->nfields; i++)
|
||||
{
|
||||
newline->fields[i].len = old->fields[i].len;
|
||||
newline->fields[i].beg = newline->buf.buffer + (old->fields[i].beg
|
||||
- old->buf.buffer);
|
||||
}
|
||||
return newline;
|
||||
}
|
||||
|
||||
static void
|
||||
freeline (struct line *line)
|
||||
{
|
||||
@ -390,49 +374,69 @@ check_order (const struct line *prev,
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
reset_line (struct line *line)
|
||||
{
|
||||
line->nfields = 0;
|
||||
}
|
||||
|
||||
static struct line *
|
||||
init_linep (struct line **linep)
|
||||
{
|
||||
struct line *line = xmalloc (sizeof *line);
|
||||
memset (line, '\0', sizeof *line);
|
||||
*linep = line;
|
||||
return line;
|
||||
}
|
||||
|
||||
/* Read a line from FP into LINE and split it into fields.
|
||||
Return true if successful. */
|
||||
|
||||
static bool
|
||||
get_line (FILE *fp, struct line *line, int which)
|
||||
get_line (FILE *fp, struct line **linep, int which)
|
||||
{
|
||||
initbuffer (&line->buf);
|
||||
struct line *line = *linep;
|
||||
|
||||
if (line == prevline[which - 1])
|
||||
{
|
||||
SWAPLINES (line, spareline[which - 1]);
|
||||
*linep = line;
|
||||
}
|
||||
|
||||
if (line)
|
||||
reset_line (line);
|
||||
else
|
||||
line = init_linep (linep);
|
||||
|
||||
if (! readlinebuffer (&line->buf, fp))
|
||||
{
|
||||
if (ferror (fp))
|
||||
error (EXIT_FAILURE, errno, _("read error"));
|
||||
free (line->buf.buffer);
|
||||
line->buf.buffer = NULL;
|
||||
freeline (line);
|
||||
return false;
|
||||
}
|
||||
|
||||
line->nfields_allocated = 0;
|
||||
line->nfields = 0;
|
||||
line->fields = NULL;
|
||||
xfields (line);
|
||||
|
||||
if (prevline[which - 1])
|
||||
{
|
||||
check_order (prevline[which - 1], line, which);
|
||||
freeline (prevline[which - 1]);
|
||||
free (prevline[which - 1]);
|
||||
}
|
||||
prevline[which - 1] = dup_line (line);
|
||||
check_order (prevline[which - 1], line, which);
|
||||
|
||||
prevline[which - 1] = line;
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
free_prevline (void)
|
||||
free_spareline (void)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < ARRAY_CARDINALITY (prevline); i++)
|
||||
for (i = 0; i < ARRAY_CARDINALITY (spareline); i++)
|
||||
{
|
||||
if (prevline[i])
|
||||
freeline (prevline[i]);
|
||||
free (prevline[i]);
|
||||
prevline[i] = NULL;
|
||||
if (spareline[i])
|
||||
{
|
||||
freeline (spareline[i]);
|
||||
free (spareline[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -450,7 +454,12 @@ static bool
|
||||
getseq (FILE *fp, struct seq *seq, int whichfile)
|
||||
{
|
||||
if (seq->count == seq->alloc)
|
||||
seq->lines = X2NREALLOC (seq->lines, &seq->alloc);
|
||||
{
|
||||
size_t i;
|
||||
seq->lines = X2NREALLOC (seq->lines, &seq->alloc);
|
||||
for (i = seq->count; i < seq->alloc; i++)
|
||||
seq->lines[i] = NULL;
|
||||
}
|
||||
|
||||
if (get_line (fp, &seq->lines[seq->count], whichfile))
|
||||
{
|
||||
@ -466,10 +475,8 @@ static bool
|
||||
advance_seq (FILE *fp, struct seq *seq, bool first, int whichfile)
|
||||
{
|
||||
if (first)
|
||||
{
|
||||
freeline (&seq->lines[0]);
|
||||
seq->count = 0;
|
||||
}
|
||||
seq->count = 0;
|
||||
|
||||
return getseq (fp, seq, whichfile);
|
||||
}
|
||||
|
||||
@ -477,9 +484,13 @@ static void
|
||||
delseq (struct seq *seq)
|
||||
{
|
||||
size_t i;
|
||||
for (i = 0; i < seq->count; i++)
|
||||
if (seq->lines[i].buf.buffer)
|
||||
freeline (&seq->lines[i]);
|
||||
for (i = 0; i < seq->alloc; i++)
|
||||
if (seq->lines[i])
|
||||
{
|
||||
if (seq->lines[i]->buf.buffer)
|
||||
freeline (seq->lines[i]);
|
||||
free (seq->lines[i]);
|
||||
}
|
||||
free (seq->lines);
|
||||
}
|
||||
|
||||
@ -592,10 +603,12 @@ static void
|
||||
join (FILE *fp1, FILE *fp2)
|
||||
{
|
||||
struct seq seq1, seq2;
|
||||
struct line line;
|
||||
struct line **linep = xmalloc (sizeof *linep);
|
||||
int diff;
|
||||
bool eof1, eof2, checktail;
|
||||
|
||||
*linep = NULL;
|
||||
|
||||
/* Read the first line of each file. */
|
||||
initseq (&seq1);
|
||||
getseq (fp1, &seq1, 1);
|
||||
@ -605,12 +618,12 @@ join (FILE *fp1, FILE *fp2)
|
||||
while (seq1.count && seq2.count)
|
||||
{
|
||||
size_t i;
|
||||
diff = keycmp (&seq1.lines[0], &seq2.lines[0],
|
||||
diff = keycmp (seq1.lines[0], seq2.lines[0],
|
||||
join_field_1, join_field_2);
|
||||
if (diff < 0)
|
||||
{
|
||||
if (print_unpairables_1)
|
||||
prjoin (&seq1.lines[0], &uni_blank);
|
||||
prjoin (seq1.lines[0], &uni_blank);
|
||||
advance_seq (fp1, &seq1, true, 1);
|
||||
seen_unpairable = true;
|
||||
continue;
|
||||
@ -618,7 +631,7 @@ join (FILE *fp1, FILE *fp2)
|
||||
if (diff > 0)
|
||||
{
|
||||
if (print_unpairables_2)
|
||||
prjoin (&uni_blank, &seq2.lines[0]);
|
||||
prjoin (&uni_blank, seq2.lines[0]);
|
||||
advance_seq (fp2, &seq2, true, 2);
|
||||
seen_unpairable = true;
|
||||
continue;
|
||||
@ -634,7 +647,7 @@ join (FILE *fp1, FILE *fp2)
|
||||
++seq1.count;
|
||||
break;
|
||||
}
|
||||
while (!keycmp (&seq1.lines[seq1.count - 1], &seq2.lines[0],
|
||||
while (!keycmp (seq1.lines[seq1.count - 1], seq2.lines[0],
|
||||
join_field_1, join_field_2));
|
||||
|
||||
/* Keep reading lines from file2 as long as they continue to
|
||||
@ -647,7 +660,7 @@ join (FILE *fp1, FILE *fp2)
|
||||
++seq2.count;
|
||||
break;
|
||||
}
|
||||
while (!keycmp (&seq1.lines[0], &seq2.lines[seq2.count - 1],
|
||||
while (!keycmp (seq1.lines[0], seq2.lines[seq2.count - 1],
|
||||
join_field_1, join_field_2));
|
||||
|
||||
if (print_pairables)
|
||||
@ -656,25 +669,21 @@ join (FILE *fp1, FILE *fp2)
|
||||
{
|
||||
size_t j;
|
||||
for (j = 0; j < seq2.count - 1; ++j)
|
||||
prjoin (&seq1.lines[i], &seq2.lines[j]);
|
||||
prjoin (seq1.lines[i], seq2.lines[j]);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < seq1.count - 1; ++i)
|
||||
freeline (&seq1.lines[i]);
|
||||
if (!eof1)
|
||||
{
|
||||
seq1.lines[0] = seq1.lines[seq1.count - 1];
|
||||
SWAPLINES (seq1.lines[0], seq1.lines[seq1.count - 1]);
|
||||
seq1.count = 1;
|
||||
}
|
||||
else
|
||||
seq1.count = 0;
|
||||
|
||||
for (i = 0; i < seq2.count - 1; ++i)
|
||||
freeline (&seq2.lines[i]);
|
||||
if (!eof2)
|
||||
{
|
||||
seq2.lines[0] = seq2.lines[seq2.count - 1];
|
||||
SWAPLINES (seq2.lines[0], seq2.lines[seq2.count - 1]);
|
||||
seq2.count = 1;
|
||||
}
|
||||
else
|
||||
@ -694,14 +703,12 @@ join (FILE *fp1, FILE *fp2)
|
||||
if ((print_unpairables_1 || checktail) && seq1.count)
|
||||
{
|
||||
if (print_unpairables_1)
|
||||
prjoin (&seq1.lines[0], &uni_blank);
|
||||
freeline (&seq1.lines[0]);
|
||||
prjoin (seq1.lines[0], &uni_blank);
|
||||
seen_unpairable = true;
|
||||
while (get_line (fp1, &line, 1))
|
||||
while (get_line (fp1, linep, 1))
|
||||
{
|
||||
if (print_unpairables_1)
|
||||
prjoin (&line, &uni_blank);
|
||||
freeline (&line);
|
||||
prjoin (*linep, &uni_blank);
|
||||
if (issued_disorder_warning[0] && !print_unpairables_1)
|
||||
break;
|
||||
}
|
||||
@ -710,19 +717,20 @@ join (FILE *fp1, FILE *fp2)
|
||||
if ((print_unpairables_2 || checktail) && seq2.count)
|
||||
{
|
||||
if (print_unpairables_2)
|
||||
prjoin (&uni_blank, &seq2.lines[0]);
|
||||
freeline (&seq2.lines[0]);
|
||||
prjoin (&uni_blank, seq2.lines[0]);
|
||||
seen_unpairable = true;
|
||||
while (get_line (fp2, &line, 2))
|
||||
while (get_line (fp2, linep, 2))
|
||||
{
|
||||
if (print_unpairables_2)
|
||||
prjoin (&uni_blank, &line);
|
||||
freeline (&line);
|
||||
prjoin (&uni_blank, *linep);
|
||||
if (issued_disorder_warning[1] && !print_unpairables_2)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
free (*linep);
|
||||
|
||||
free (linep);
|
||||
delseq (&seq1);
|
||||
delseq (&seq2);
|
||||
}
|
||||
@ -938,7 +946,7 @@ main (int argc, char **argv)
|
||||
hard_LC_COLLATE = hard_locale (LC_COLLATE);
|
||||
|
||||
atexit (close_stdout);
|
||||
atexit (free_prevline);
|
||||
atexit (free_spareline);
|
||||
|
||||
print_pairables = true;
|
||||
seen_unpairable = false;
|
||||
|
Loading…
Reference in New Issue
Block a user