basenc: prefer signed to unsigned integers

This patch modifies basenc to prefer signed integers to
unsigned, as signed are less error-prone.
This patch also updates Gnulib to to latest, which updates Gnulib’s
base32 and base64 modules to prefer signed to unsigned integers.
* src/basenc.c: Include idx.h.
(struct base2_decode_context): Use unsigned char, not unsigned
for an octet that must fit in an unsigned char.
(base_encode, struct base_decode_context)
(base64_decode_ctx_wrapper, prepare_inbuf, base64url_encode)
(base64url_decode_ctx_wrapper, base32_decode_ctx_wrapper)
(base32hex_encode, base32hex_decode_ctx_wrapper, base16_encode)
(base16_decode_ctx, z85_encode, Z85_HI_CTX_TO_32BIT_VAL)
(z85_decoding, z85_decode_ctx, base2msbf_encode)
(base2lsbf_encode, base2lsbf_decode_ctx, base2msbf_decode_ctx)
(wrap_write, do_encode, do_decode, main):
Prefer signed integers to unsigned.
(main): Treat extremely large wrap columns as if they were
infinite; that’s good enough.  Since we’re now using xstrtoimax,
this allows ‘-w -0’ (same as ‘-w 0’).
* tests/misc/base64.pl (gen_tests): -w-0 is no longer an error.
This commit is contained in:
Paul Eggert 2021-08-27 15:29:46 -07:00
parent 88562dc9c7
commit 97d8dcc5ca
3 changed files with 88 additions and 105 deletions

2
gnulib

@ -1 +1 @@
Subproject commit 4ea0e64a8db7064427f6aa5624a4efd4b41db132
Subproject commit 452e8a8f7b07a794b1de3813062a992509f5c0c9

View File

@ -27,6 +27,7 @@
#include "die.h"
#include "error.h"
#include "fadvise.h"
#include "idx.h"
#include "quote.h"
#include "xstrtol.h"
#include "xdectoint.h"
@ -217,8 +218,8 @@ verify (DEC_BLOCKSIZE % 12 == 0); /* So complete encoded blocks are used. */
static int (*base_length) (int i);
static bool (*isbase) (char ch);
static void (*base_encode) (char const *restrict in, size_t inlen,
char *restrict out, size_t outlen);
static void (*base_encode) (char const *restrict in, idx_t inlen,
char *restrict out, idx_t outlen);
struct base16_decode_context
{
@ -234,7 +235,7 @@ struct z85_decode_context
struct base2_decode_context
{
unsigned octet;
unsigned char octet;
};
struct base_decode_context
@ -248,12 +249,12 @@ struct base_decode_context
struct z85_decode_context z85;
} ctx;
char *inbuf;
size_t bufsize;
idx_t bufsize;
};
static void (*base_decode_ctx_init) (struct base_decode_context *ctx);
static bool (*base_decode_ctx) (struct base_decode_context *ctx,
char const *restrict in, size_t inlen,
char *restrict out, size_t *outlen);
char const *restrict in, idx_t inlen,
char *restrict out, idx_t *outlen);
#endif
@ -275,8 +276,8 @@ base64_decode_ctx_init_wrapper (struct base_decode_context *ctx)
static bool
base64_decode_ctx_wrapper (struct base_decode_context *ctx,
char const *restrict in, size_t inlen,
char *restrict out, size_t *outlen)
char const *restrict in, idx_t inlen,
char *restrict out, idx_t *outlen)
{
bool b = base64_decode_ctx (&ctx->ctx.base64, in, inlen, out, outlen);
ctx->i = ctx->ctx.base64.i;
@ -291,7 +292,7 @@ init_inbuf (struct base_decode_context *ctx)
}
static void
prepare_inbuf (struct base_decode_context *ctx, size_t inlen)
prepare_inbuf (struct base_decode_context *ctx, idx_t inlen)
{
if (ctx->bufsize < inlen)
{
@ -302,8 +303,8 @@ prepare_inbuf (struct base_decode_context *ctx, size_t inlen)
static void
base64url_encode (char const *restrict in, size_t inlen,
char *restrict out, size_t outlen)
base64url_encode (char const *restrict in, idx_t inlen,
char *restrict out, idx_t outlen)
{
base64_encode (in, inlen, out, outlen);
/* translate 62nd and 63rd characters */
@ -335,14 +336,14 @@ base64url_decode_ctx_init_wrapper (struct base_decode_context *ctx)
static bool
base64url_decode_ctx_wrapper (struct base_decode_context *ctx,
char const *restrict in, size_t inlen,
char *restrict out, size_t *outlen)
char const *restrict in, idx_t inlen,
char *restrict out, idx_t *outlen)
{
prepare_inbuf (ctx, inlen);
memcpy (ctx->inbuf, in, inlen);
/* translate 62nd and 63rd characters */
size_t i = inlen;
idx_t i = inlen;
char* p = ctx->inbuf;
while (i--)
{
@ -381,8 +382,8 @@ base32_decode_ctx_init_wrapper (struct base_decode_context *ctx)
static bool
base32_decode_ctx_wrapper (struct base_decode_context *ctx,
char const *restrict in, size_t inlen,
char *restrict out, size_t *outlen)
char const *restrict in, idx_t inlen,
char *restrict out, idx_t *outlen)
{
bool b = base32_decode_ctx (&ctx->ctx.base32, in, inlen, out, outlen);
ctx->i = ctx->ctx.base32.i;
@ -439,8 +440,8 @@ isbase32hex (char ch)
static void
base32hex_encode (char const *restrict in, size_t inlen,
char *restrict out, size_t outlen)
base32hex_encode (char const *restrict in, idx_t inlen,
char *restrict out, idx_t outlen)
{
base32_encode (in, inlen, out, outlen);
@ -462,12 +463,12 @@ base32hex_decode_ctx_init_wrapper (struct base_decode_context *ctx)
static bool
base32hex_decode_ctx_wrapper (struct base_decode_context *ctx,
char const *restrict in, size_t inlen,
char *restrict out, size_t *outlen)
char const *restrict in, idx_t inlen,
char *restrict out, idx_t *outlen)
{
prepare_inbuf (ctx, inlen);
size_t i = inlen;
idx_t i = inlen;
char *p = ctx->inbuf;
while (i--)
{
@ -502,8 +503,8 @@ base16_length (int len)
static const char base16[16] = "0123456789ABCDEF";
static void
base16_encode (char const *restrict in, size_t inlen,
char *restrict out, size_t outlen)
base16_encode (char const *restrict in, idx_t inlen,
char *restrict out, idx_t outlen)
{
while (inlen--)
{
@ -526,11 +527,10 @@ base16_decode_ctx_init (struct base_decode_context *ctx)
static bool
base16_decode_ctx (struct base_decode_context *ctx,
char const *restrict in, size_t inlen,
char *restrict out, size_t *outlen)
char const *restrict in, idx_t inlen,
char *restrict out, idx_t *outlen)
{
bool ignore_lines = true; /* for now, always ignore them */
unsigned int nib;
*outlen = 0;
@ -548,15 +548,14 @@ base16_decode_ctx (struct base_decode_context *ctx,
continue;
}
if (*in >= 'A' && *in <= 'F')
nib = (*in-'A'+10);
else if (*in >= '0' && *in <= '9')
nib = (*in-'0');
int nib = *in++;
if ('0' <= nib && nib <= '9')
nib -= '0';
else if ('A' <= nib && nib <= 'F')
nib -= 'A' - 10;
else
return false; /* garbage - return false */
++in;
if (ctx->ctx.base16.have_nibble)
{
/* have both nibbles, write octet */
@ -597,13 +596,12 @@ static char const z85_encoding[85] =
".-:+=^!/*?&<>()[]{}@%$#";
static void
z85_encode (char const *restrict in, size_t inlen,
char *restrict out, size_t outlen)
z85_encode (char const *restrict in, idx_t inlen,
char *restrict out, idx_t outlen)
{
int i = 0;
unsigned char quad[4];
unsigned int val;
size_t outidx = 0;
idx_t outidx = 0;
while (true)
{
@ -626,14 +624,12 @@ z85_encode (char const *restrict in, size_t inlen,
/* Got a quad, encode it */
if (i == 4)
{
val = ((uint32_t) quad[0] << 24)
+ ((uint32_t) quad[1] << 16)
+ ((uint32_t) quad[2] << 8)
+ quad[3];
int_fast64_t val = quad[0];
val = (val << 24) + (quad[1] << 16) + (quad[2] << 8) + quad[3];
for (int j = 4; j>=0; --j)
for (int j = 4; j >= 0; --j)
{
unsigned char c = val%85;
int c = val % 85;
val /= 85;
/* NOTE: if there is padding (which is trimmed by z85
@ -667,7 +663,7 @@ z85_decode_ctx_init (struct base_decode_context *ctx)
# define Z85_HI_CTX_TO_32BIT_VAL(ctx) \
((uint32_t)(ctx)->ctx.z85.octets[0] * 85 * 85 * 85 * 85 )
((int_fast64_t) (ctx)->ctx.z85.octets[0] * 85 * 85 * 85 * 85 )
/*
0 - 9: 0 1 2 3 4 5 6 7 8 9
@ -680,25 +676,25 @@ z85_decode_ctx_init (struct base_decode_context *ctx)
70 - 79: * ? & < > ( ) [ ] {
80 - 84: } @ % $ #
*/
static unsigned char z85_decoding[93] = {
68, 255, 84, 83, 82, 72, 255, /* ! " # $ % & ' */
75, 76, 70, 65, 255, 63, 62, 69, /* ( ) * + , - . / */
static signed char const z85_decoding[93] = {
68, -1, 84, 83, 82, 72, -1, /* ! " # $ % & ' */
75, 76, 70, 65, -1, 63, 62, 69, /* ( ) * + , - . / */
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, /* '0' to '9' */
64, 255, 73, 66, 74, 71, 81, /* : ; < = > ? @ */
64, -1, 73, 66, 74, 71, 81, /* : ; < = > ? @ */
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, /* 'A' to 'J' */
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, /* 'K' to 'T' */
56, 57, 58, 59, 60, 61, /* 'U' to 'Z' */
77, 255, 78, 67, 255, 255, /* [ \ ] ^ _ ` */
77, -1, 78, 67, -1, -1, /* [ \ ] ^ _ ` */
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, /* 'a' to 'j' */
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, /* 'k' to 't' */
30, 31, 32, 33, 34, 35, /* 'u' to 'z' */
79, 255, 80 /* { | } */
79, -1, 80 /* { | } */
};
static bool
z85_decode_ctx (struct base_decode_context *ctx,
char const *restrict in, size_t inlen,
char *restrict out, size_t *outlen)
char const *restrict in, idx_t inlen,
char *restrict out, idx_t *outlen)
{
bool ignore_lines = true; /* for now, always ignore them */
@ -731,9 +727,10 @@ z85_decode_ctx (struct base_decode_context *ctx,
if (c >= 33 && c <= 125)
{
c = z85_decoding[c-33];
if (c == 255)
signed char ch = z85_decoding[c - 33];
if (ch < 0)
return false; /* garbage - return false */
c = ch;
}
else
return false; /* garbage - return false */
@ -744,28 +741,16 @@ z85_decode_ctx (struct base_decode_context *ctx,
if (ctx->ctx.z85.i == 5)
{
/* decode the lowest 4 octets, then check for overflows. */
unsigned int val = Z85_LO_CTX_TO_32BIT_VAL (ctx);
int_fast64_t val = Z85_LO_CTX_TO_32BIT_VAL (ctx);
/* The Z85 spec and the reference implementation say nothing
about overflows. To be on the safe side, reject them.
about overflows. To be on the safe side, reject them. */
'$' (decoded to 83) in the highest octet
would result in value of 83*85^4 = 4332651875 , which is larger
than 2^32-1 and will overflow an unsigned int (similarly
for '$' decoded to 84).
'%' (decoded to 82) in the highest octet can fit in unsigned int
if the other 4 octets decode to a small enough value.
*/
if (ctx->ctx.z85.octets[0] == 84 || ctx->ctx.z85.octets[0] == 83
|| (ctx->ctx.z85.octets[0] == 82
&& val > 0xFFFFFFFF - 82*85*85*85*85U))
val += Z85_HI_CTX_TO_32BIT_VAL (ctx);
if ((val >> 24) & ~0xFF)
return false;
/* no overflow, add the high octet value */
val += Z85_HI_CTX_TO_32BIT_VAL (ctx);
*out++ = (val >> 24) & 0xFF;
*out++ = val >> 24;
*out++ = (val >> 16) & 0xFF;
*out++ = (val >> 8) & 0xFF;
*out++ = val & 0xFF;
@ -794,8 +779,8 @@ base2_length (int len)
inline static void
base2msbf_encode (char const *restrict in, size_t inlen,
char *restrict out, size_t outlen)
base2msbf_encode (char const *restrict in, idx_t inlen,
char *restrict out, idx_t outlen)
{
while (inlen--)
{
@ -811,8 +796,8 @@ base2msbf_encode (char const *restrict in, size_t inlen,
}
inline static void
base2lsbf_encode (char const *restrict in, size_t inlen,
char *restrict out, size_t outlen)
base2lsbf_encode (char const *restrict in, idx_t inlen,
char *restrict out, idx_t outlen)
{
while (inlen--)
{
@ -839,8 +824,8 @@ base2_decode_ctx_init (struct base_decode_context *ctx)
static bool
base2lsbf_decode_ctx (struct base_decode_context *ctx,
char const *restrict in, size_t inlen,
char *restrict out, size_t *outlen)
char const *restrict in, idx_t inlen,
char *restrict out, idx_t *outlen)
{
bool ignore_lines = true; /* for now, always ignore them */
@ -883,8 +868,8 @@ base2lsbf_decode_ctx (struct base_decode_context *ctx,
static bool
base2msbf_decode_ctx (struct base_decode_context *ctx,
char const *restrict in, size_t inlen,
char *restrict out, size_t *outlen)
char const *restrict in, idx_t inlen,
char *restrict out, idx_t *outlen)
{
bool ignore_lines = true; /* for now, always ignore them */
@ -932,11 +917,9 @@ base2msbf_decode_ctx (struct base_decode_context *ctx,
static void
wrap_write (char const *buffer, size_t len,
uintmax_t wrap_column, size_t *current_column, FILE *out)
wrap_write (char const *buffer, idx_t len,
idx_t wrap_column, idx_t *current_column, FILE *out)
{
size_t written;
if (wrap_column == 0)
{
/* Simple write. */
@ -944,11 +927,9 @@ wrap_write (char const *buffer, size_t len,
die (EXIT_FAILURE, errno, _("write error"));
}
else
for (written = 0; written < len;)
for (idx_t written = 0; written < len; )
{
uintmax_t cols_remaining = wrap_column - *current_column;
size_t to_write = MIN (cols_remaining, SIZE_MAX);
to_write = MIN (to_write, len - written);
idx_t to_write = MIN (wrap_column - *current_column, len - written);
if (to_write == 0)
{
@ -967,18 +948,18 @@ wrap_write (char const *buffer, size_t len,
}
static void
do_encode (FILE *in, FILE *out, uintmax_t wrap_column)
do_encode (FILE *in, FILE *out, idx_t wrap_column)
{
size_t current_column = 0;
idx_t current_column = 0;
char *inbuf, *outbuf;
size_t sum;
idx_t sum;
inbuf = xmalloc (ENC_BLOCKSIZE);
outbuf = xmalloc (BASE_LENGTH (ENC_BLOCKSIZE));
do
{
size_t n;
idx_t n;
sum = 0;
do
@ -1015,7 +996,7 @@ static void
do_decode (FILE *in, FILE *out, bool ignore_garbage)
{
char *inbuf, *outbuf;
size_t sum;
idx_t sum;
struct base_decode_context ctx;
inbuf = xmalloc (BASE_LENGTH (DEC_BLOCKSIZE));
@ -1029,17 +1010,16 @@ do_decode (FILE *in, FILE *out, bool ignore_garbage)
do
{
bool ok;
size_t n;
unsigned int k;
sum = 0;
do
{
n = fread (inbuf + sum, 1, BASE_LENGTH (DEC_BLOCKSIZE) - sum, in);
idx_t n = fread (inbuf + sum,
1, BASE_LENGTH (DEC_BLOCKSIZE) - sum, in);
if (ignore_garbage)
{
for (size_t i = 0; n > 0 && i < n;)
for (idx_t i = 0; n > 0 && i < n;)
{
if (isbase (inbuf[sum + i]) || inbuf[sum + i] == '=')
i++;
@ -1059,11 +1039,11 @@ do_decode (FILE *in, FILE *out, bool ignore_garbage)
However, when it processes the final input buffer, we want
to iterate it one additional time, but with an indicator
telling it to flush what is in CTX. */
for (k = 0; k < 1 + !!feof (in); k++)
for (int k = 0; k < 1 + !!feof (in); k++)
{
if (k == 1 && ctx.i == 0)
break;
n = DEC_BLOCKSIZE;
idx_t n = DEC_BLOCKSIZE;
ok = base_decode_ctx (&ctx, inbuf, (k == 0 ? sum : 0), outbuf, &n);
if (fwrite (outbuf, 1, n, out) < n)
@ -1093,8 +1073,8 @@ main (int argc, char **argv)
bool decode = false;
/* True if we should ignore non-base-alphabetic characters. */
bool ignore_garbage = false;
/* Wrap encoded data around the 76:th column, by default. */
uintmax_t wrap_column = 76;
/* Wrap encoded data around the 76th column, by default. */
idx_t wrap_column = 76;
#if BASE_TYPE == 42
int base_type = 0;
@ -1116,8 +1096,14 @@ main (int argc, char **argv)
break;
case 'w':
wrap_column = xdectoumax (optarg, 0, UINTMAX_MAX, "",
_("invalid wrap size"), 0);
{
intmax_t w;
strtol_error s_err = xstrtoimax (optarg, NULL, 10, &w, "");
if (LONGINT_OVERFLOW < s_err || w < 0)
die (EXIT_FAILURE, 0, "%s: %s",
_("invalid wrap size"), quote (optarg));
wrap_column = s_err == LONGINT_OVERFLOW || IDX_MAX < w ? 0 : w;
}
break;
case 'i':

View File

@ -91,9 +91,6 @@ sub gen_tests($)
['wrap-bad-3', '-w-1', {IN=>''}, {OUT=>""},
{ERR_SUBST => 's/base..:/base..:/'},
{ERR => "base..: invalid wrap size: '-1'\n"}, {EXIT => 1}],
['wrap-bad-4', '-w-0', {IN=>''}, {OUT=>""},
{ERR_SUBST => 's/base..:/base..:/'},
{ERR => "base..: invalid wrap size: '-0'\n"}, {EXIT => 1}],
['buf-1', '--decode', {IN=>&$enc(1)}, {OUT=>'a' x 1}],
['buf-2', '--decode', {IN=>&$enc(2)}, {OUT=>'a' x 2}],