coreutils/lib/readtokens.c

206 lines
4.9 KiB
C
Raw Normal View History

1995-01-27 13:37:03 +08:00
/* readtokens.c -- Functions for reading tokens from an input stream.
Copyright (C) 1990-1991, 1999-2004 Free Software Foundation, Inc.
1995-01-27 13:37:03 +08:00
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
1996-07-15 11:43:36 +08:00
along with this program; if not, write to the Free Software Foundation,
2005-05-14 15:58:06 +08:00
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
1995-01-27 13:37:03 +08:00
Written by Jim Meyering. */
/* This almost supercedes xreadline stuff -- using delim="\n"
gives the same functionality, except that these functions
would never return empty lines. */
1995-01-27 13:37:03 +08:00
#ifdef HAVE_CONFIG_H
1996-07-15 11:56:06 +08:00
# include <config.h>
1995-01-27 13:37:03 +08:00
#endif
#include "readtokens.h"
1995-01-27 13:37:03 +08:00
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
1995-01-27 13:37:03 +08:00
#include "xalloc.h"
1995-01-27 13:37:03 +08:00
#if USE_UNLOCKED_IO
# include "unlocked-io.h"
#endif
1995-01-27 13:37:03 +08:00
#define STREQ(a,b) ((a) == (b) || ((a) && (b) && *(a) == *(b) \
&& strcmp(a, b) == 0))
/* Initialize a tokenbuffer. */
void
init_tokenbuffer (token_buffer *tokenbuffer)
1995-01-27 13:37:03 +08:00
{
tokenbuffer->size = 0;
tokenbuffer->buffer = NULL;
1995-01-27 13:37:03 +08:00
}
/* Read a token from STREAM into TOKENBUFFER.
A token is delimited by any of the N_DELIM bytes in DELIM.
1995-01-27 13:37:03 +08:00
Upon return, the token is in tokenbuffer->buffer and
has a trailing '\0' instead of any original delimiter.
1995-01-27 13:37:03 +08:00
The function value is the length of the token not including
the final '\0'. Upon EOF (i.e. on the call after the last
token is read) or error, return -1 without modifying tokenbuffer.
The EOF and error conditions may be distinguished in the caller
by testing ferror (STREAM).
1995-01-27 13:37:03 +08:00
This function works properly on lines containing NUL bytes
and on files do not end with a delimiter. */
1995-01-27 13:37:03 +08:00
size_t
1999-04-11 21:25:52 +08:00
readtoken (FILE *stream,
const char *delim,
size_t n_delim,
1999-04-11 21:25:52 +08:00
token_buffer *tokenbuffer)
1995-01-27 13:37:03 +08:00
{
char *p;
int c;
size_t i, n;
1995-01-27 13:37:03 +08:00
static const char *saved_delim = NULL;
static char isdelim[256];
bool same_delimiters;
1995-01-27 13:37:03 +08:00
if (delim == NULL && saved_delim == NULL)
abort ();
same_delimiters = false;
1995-01-27 13:37:03 +08:00
if (delim != saved_delim && saved_delim != NULL)
{
same_delimiters = true;
1995-01-27 13:37:03 +08:00
for (i = 0; i < n_delim; i++)
{
if (delim[i] != saved_delim[i])
{
same_delimiters = false;
1995-01-27 13:37:03 +08:00
break;
}
}
}
if (!same_delimiters)
{
size_t j;
1995-01-27 13:37:03 +08:00
saved_delim = delim;
memset (isdelim, 0, sizeof isdelim);
for (j = 0; j < n_delim; j++)
2004-08-03 06:52:39 +08:00
{
unsigned char ch = delim[j];
isdelim[ch] = 1;
}
1995-01-27 13:37:03 +08:00
}
/* FIXME: don't fool with this caching. Use strchr instead. */
1995-01-27 13:37:03 +08:00
/* skip over any leading delimiters */
for (c = getc (stream); c >= 0 && isdelim[c]; c = getc (stream))
{
/* empty */
}
p = tokenbuffer->buffer;
n = tokenbuffer->size;
i = 0;
1995-01-27 13:37:03 +08:00
for (;;)
{
if (c < 0 && i == 0)
return -1;
if (i == n)
p = x2nrealloc (p, &n, sizeof *p);
1995-01-27 13:37:03 +08:00
if (c < 0)
{
p[i] = 0;
break;
}
if (isdelim[c])
{
p[i] = 0;
break;
}
p[i++] = c;
c = getc (stream);
}
tokenbuffer->buffer = p;
tokenbuffer->size = n;
return i;
1995-01-27 13:37:03 +08:00
}
/* Build a NULL-terminated array of pointers to tokens
read from STREAM. Return the number of tokens read.
All storage is obtained through calls to xmalloc-like functions.
1995-01-27 13:37:03 +08:00
%%% Question: is it worth it to do a single
%%% realloc() of `tokens' just before returning? */
size_t
1999-04-11 21:25:52 +08:00
readtokens (FILE *stream,
size_t projected_n_tokens,
1999-04-11 21:25:52 +08:00
const char *delim,
size_t n_delim,
1999-04-11 21:25:52 +08:00
char ***tokens_out,
size_t **token_lengths)
1995-01-27 13:37:03 +08:00
{
token_buffer tb, *token = &tb;
char **tokens;
size_t *lengths;
size_t sz;
size_t n_tokens;
1995-01-27 13:37:03 +08:00
if (projected_n_tokens == 0)
1995-01-27 13:37:03 +08:00
projected_n_tokens = 64;
else
projected_n_tokens++; /* add one for trailing NULL pointer */
1995-01-27 13:37:03 +08:00
sz = projected_n_tokens;
tokens = xnmalloc (sz, sizeof *tokens);
lengths = xnmalloc (sz, sizeof *lengths);
1995-01-27 13:37:03 +08:00
n_tokens = 0;
1995-01-27 13:37:03 +08:00
init_tokenbuffer (token);
for (;;)
{
char *tmp;
size_t token_length = readtoken (stream, delim, n_delim, token);
1995-01-27 13:37:03 +08:00
if (n_tokens >= sz)
{
tokens = x2nrealloc (tokens, &sz, sizeof *tokens);
lengths = xnrealloc (lengths, sz, sizeof *lengths);
1995-01-27 13:37:03 +08:00
}
if (token_length == (size_t) -1)
1995-01-27 13:37:03 +08:00
{
/* don't increment n_tokens for NULL entry */
tokens[n_tokens] = NULL;
lengths[n_tokens] = 0;
1995-01-27 13:37:03 +08:00
break;
}
tmp = xnmalloc (token_length + 1, sizeof *tmp);
1995-01-27 13:37:03 +08:00
lengths[n_tokens] = token_length;
tokens[n_tokens] = memcpy (tmp, token->buffer, token_length + 1);
1995-01-27 13:37:03 +08:00
n_tokens++;
}
free (token->buffer);
*tokens_out = tokens;
if (token_lengths != NULL)
*token_lengths = lengths;
return n_tokens;
}