fileinfo: Port libmagic 5.40

Signed-off-by: Anatol Belski <ab@php.net>
This commit is contained in:
Anatol Belski 2021-04-01 20:15:45 +02:00
parent 22019a1edd
commit 3b9173dc8f
21 changed files with 174944 additions and 148884 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -34,11 +34,10 @@
#include "file.h" #include "file.h"
#ifndef lint #ifndef lint
FILE_RCSID("@(#)$File: apprentice.c,v 1.297 2020/05/09 18:57:15 christos Exp $") FILE_RCSID("@(#)$File: apprentice.c,v 1.301 2021/02/23 00:51:11 christos Exp $")
#endif /* lint */ #endif /* lint */
#include "magic.h" #include "magic.h"
#include "patchlevel.h"
#include <stdlib.h> #include <stdlib.h>
#if defined(__hpux) && !defined(HAVE_STRTOULL) #if defined(__hpux) && !defined(HAVE_STRTOULL)
@ -536,6 +535,7 @@ file_ms_alloc(int flags)
ms->elf_notes_max = FILE_ELF_NOTES_MAX; ms->elf_notes_max = FILE_ELF_NOTES_MAX;
ms->regex_max = FILE_REGEX_MAX; ms->regex_max = FILE_REGEX_MAX;
ms->bytes_max = FILE_BYTES_MAX; ms->bytes_max = FILE_BYTES_MAX;
ms->encoding_max = FILE_ENCODING_MAX;
return ms; return ms;
free: free:
efree(ms); efree(ms);
@ -1416,7 +1416,10 @@ apprentice_load(struct magic_set *ms, const char *fn, int action)
*/ */
set_last_default(ms, mset[j].me, mset[j].count); set_last_default(ms, mset[j].me, mset[j].count);
/* coalesce per file arrays into a single one */ /* coalesce per file arrays into a single one, if needed */
if (mset[j].count == 0)
continue;
if (coalesce_entries(ms, mset[j].me, mset[j].count, if (coalesce_entries(ms, mset[j].me, mset[j].count,
&map->magic[j], &map->nmagic[j]) == -1) { &map->magic[j], &map->nmagic[j]) == -1) {
errs++; errs++;
@ -2086,6 +2089,13 @@ parse(struct magic_set *ms, struct magic_entry *me, const char *line,
return -1; return -1;
} }
if (m->type == FILE_NAME && cont_level != 0) {
if (ms->flags & MAGIC_CHECK)
file_magwarn(ms, "`name%s' entries can only be "
"declared at top level", l);
return -1;
}
/* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
/* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */ /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
@ -2699,7 +2709,7 @@ getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
ull = CAST(uint64_t, strtoull(*p, &ep, 0)); ull = CAST(uint64_t, strtoull(*p, &ep, 0));
m->value.q = file_signextend(ms, m, ull); m->value.q = file_signextend(ms, m, ull);
if (*p == ep) { if (*p == ep) {
file_magwarn(ms, "Unparseable number `%s'", *p); file_magwarn(ms, "Unparsable number `%s'", *p);
} else { } else {
size_t ts = typesize(m->type); size_t ts = typesize(m->type);
uint64_t x; uint64_t x;
@ -3101,8 +3111,8 @@ internal_loaded:
else else
version = ptr[1]; version = ptr[1];
if (version != VERSIONNO) { if (version != VERSIONNO) {
file_error(ms, 0, "File %d.%d supports only version %d magic " file_error(ms, 0, "File %d supports only version %d magic "
"files. `%s' is version %d", FILE_VERSION_MAJOR, patchlevel, "files. `%s' is version %d", MAGIC_VERSION,
VERSIONNO, dbname, version); VERSIONNO, dbname, version);
goto error; goto error;
} }

View File

@ -35,7 +35,7 @@
#include "file.h" #include "file.h"
#ifndef lint #ifndef lint
FILE_RCSID("@(#)$File: ascmagic.c,v 1.107 2020/06/08 19:58:36 christos Exp $") FILE_RCSID("@(#)$File: ascmagic.c,v 1.109 2021/02/05 23:01:40 christos Exp $")
#endif /* lint */ #endif /* lint */
#include "magic.h" #include "magic.h"
@ -50,7 +50,8 @@ FILE_RCSID("@(#)$File: ascmagic.c,v 1.107 2020/06/08 19:58:36 christos Exp $")
#define ISSPC(x) ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' \ #define ISSPC(x) ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' \
|| (x) == 0x85 || (x) == '\f') || (x) == 0x85 || (x) == '\f')
private unsigned char *encode_utf8(unsigned char *, size_t, unicodechar *, size_t); private unsigned char *encode_utf8(unsigned char *, size_t, file_unichar_t *,
size_t);
private size_t trim_nuls(const unsigned char *, size_t); private size_t trim_nuls(const unsigned char *, size_t);
/* /*
@ -69,7 +70,7 @@ trim_nuls(const unsigned char *buf, size_t nbytes)
protected int protected int
file_ascmagic(struct magic_set *ms, const struct buffer *b, int text) file_ascmagic(struct magic_set *ms, const struct buffer *b, int text)
{ {
unicodechar *ubuf = NULL; file_unichar_t *ubuf = NULL;
size_t ulen = 0; size_t ulen = 0;
int rv = 1; int rv = 1;
struct buffer bb; struct buffer bb;
@ -101,9 +102,9 @@ file_ascmagic(struct magic_set *ms, const struct buffer *b, int text)
} }
protected int protected int
file_ascmagic_with_encoding(struct magic_set *ms, file_ascmagic_with_encoding(struct magic_set *ms, const struct buffer *b,
const struct buffer *b, unicodechar *ubuf, size_t ulen, const char *code, file_unichar_t *ubuf, size_t ulen, const char *code, const char *type,
const char *type, int text) int text)
{ {
struct buffer bb; struct buffer bb;
const unsigned char *buf = CAST(const unsigned char *, b->fbuf); const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
@ -127,7 +128,7 @@ file_ascmagic_with_encoding(struct magic_set *ms,
int executable = 0; int executable = 0;
size_t last_line_end = CAST(size_t, -1); size_t last_line_end = CAST(size_t, -1);
int has_long_lines = 0; size_t has_long_lines = 0;
nbytes = trim_nuls(buf, nbytes); nbytes = trim_nuls(buf, nbytes);
@ -190,8 +191,11 @@ file_ascmagic_with_encoding(struct magic_set *ms,
} }
/* If this line is _longer_ than MAXLINELEN, remember it. */ /* If this line is _longer_ than MAXLINELEN, remember it. */
if (i > last_line_end + MAXLINELEN) if (i > last_line_end + MAXLINELEN) {
has_long_lines = 1; size_t ll = i - last_line_end;
if (ll > has_long_lines)
has_long_lines = ll;
}
if (ubuf[i] == '\033') if (ubuf[i] == '\033')
has_escapes = 1; has_escapes = 1;
@ -269,7 +273,8 @@ file_ascmagic_with_encoding(struct magic_set *ms,
goto done; goto done;
if (has_long_lines) if (has_long_lines)
if (file_printf(ms, ", with very long lines") == -1) if (file_printf(ms, ", with very long lines (%zu)",
has_long_lines) == -1)
goto done; goto done;
/* /*
@ -281,7 +286,8 @@ file_ascmagic_with_encoding(struct magic_set *ms,
if (file_printf(ms, ", with") == -1) if (file_printf(ms, ", with") == -1)
goto done; goto done;
if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) { if (n_crlf == 0 && n_cr == 0 &&
n_nel == 0 && n_lf == 0) {
if (file_printf(ms, " no") == -1) if (file_printf(ms, " no") == -1)
goto done; goto done;
} else { } else {
@ -335,7 +341,7 @@ done:
* after end of string, or NULL if an invalid character is found. * after end of string, or NULL if an invalid character is found.
*/ */
private unsigned char * private unsigned char *
encode_utf8(unsigned char *buf, size_t len, unicodechar *ubuf, size_t ulen) encode_utf8(unsigned char *buf, size_t len, file_unichar_t *ubuf, size_t ulen)
{ {
size_t i; size_t i;
unsigned char *end = buf + len; unsigned char *end = buf + len;
@ -345,43 +351,45 @@ encode_utf8(unsigned char *buf, size_t len, unicodechar *ubuf, size_t ulen)
if (end - buf < 1) if (end - buf < 1)
return NULL; return NULL;
*buf++ = CAST(unsigned char, ubuf[i]); *buf++ = CAST(unsigned char, ubuf[i]);
} else if (ubuf[i] <= 0x7ff) { continue;
}
if (ubuf[i] <= 0x7ff) {
if (end - buf < 2) if (end - buf < 2)
return NULL; return NULL;
*buf++ = CAST(unsigned char, (ubuf[i] >> 6) + 0xc0); *buf++ = CAST(unsigned char, (ubuf[i] >> 6) + 0xc0);
*buf++ = CAST(unsigned char, (ubuf[i] & 0x3f) + 0x80); goto out1;
} else if (ubuf[i] <= 0xffff) { }
if (ubuf[i] <= 0xffff) {
if (end - buf < 3) if (end - buf < 3)
return NULL; return NULL;
*buf++ = CAST(unsigned char, (ubuf[i] >> 12) + 0xe0); *buf++ = CAST(unsigned char, (ubuf[i] >> 12) + 0xe0);
*buf++ = CAST(unsigned char, ((ubuf[i] >> 6) & 0x3f) + 0x80); goto out2;
*buf++ = CAST(unsigned char, (ubuf[i] & 0x3f) + 0x80); }
} else if (ubuf[i] <= 0x1fffff) { if (ubuf[i] <= 0x1fffff) {
if (end - buf < 4) if (end - buf < 4)
return NULL; return NULL;
*buf++ = CAST(unsigned char, (ubuf[i] >> 18) + 0xf0); *buf++ = CAST(unsigned char, (ubuf[i] >> 18) + 0xf0);
*buf++ = CAST(unsigned char, ((ubuf[i] >> 12) & 0x3f) + 0x80); goto out3;
*buf++ = CAST(unsigned char, ((ubuf[i] >> 6) & 0x3f) + 0x80); }
*buf++ = CAST(unsigned char, (ubuf[i] & 0x3f) + 0x80); if (ubuf[i] <= 0x3ffffff) {
} else if (ubuf[i] <= 0x3ffffff) {
if (end - buf < 5) if (end - buf < 5)
return NULL; return NULL;
*buf++ = CAST(unsigned char, (ubuf[i] >> 24) + 0xf8); *buf++ = CAST(unsigned char, (ubuf[i] >> 24) + 0xf8);
*buf++ = CAST(unsigned char, ((ubuf[i] >> 18) & 0x3f) + 0x80); goto out4;
*buf++ = CAST(unsigned char, ((ubuf[i] >> 12) & 0x3f) + 0x80); }
*buf++ = CAST(unsigned char, ((ubuf[i] >> 6) & 0x3f) + 0x80); if (ubuf[i] <= 0x7fffffff) {
*buf++ = CAST(unsigned char, (ubuf[i] & 0x3f) + 0x80);
} else if (ubuf[i] <= 0x7fffffff) {
if (end - buf < 6) if (end - buf < 6)
return NULL; return NULL;
*buf++ = CAST(unsigned char, (ubuf[i] >> 30) + 0xfc); *buf++ = CAST(unsigned char, (ubuf[i] >> 30) + 0xfc);
*buf++ = CAST(unsigned char, ((ubuf[i] >> 24) & 0x3f) + 0x80); goto out5;
*buf++ = CAST(unsigned char, ((ubuf[i] >> 18) & 0x3f) + 0x80); }
*buf++ = CAST(unsigned char, ((ubuf[i] >> 12) & 0x3f) + 0x80); /* Invalid character */
*buf++ = CAST(unsigned char, ((ubuf[i] >> 6) & 0x3f) + 0x80); return NULL;
*buf++ = CAST(unsigned char, (ubuf[i] & 0x3f) + 0x80); out5: *buf++ = CAST(unsigned char, ((ubuf[i] >> 24) & 0x3f) + 0x80);
} else /* Invalid character */ out4: *buf++ = CAST(unsigned char, ((ubuf[i] >> 18) & 0x3f) + 0x80);
return NULL; out3: *buf++ = CAST(unsigned char, ((ubuf[i] >> 12) & 0x3f) + 0x80);
out2: *buf++ = CAST(unsigned char, ((ubuf[i] >> 6) & 0x3f) + 0x80);
out1: *buf++ = CAST(unsigned char, ((ubuf[i] >> 0) & 0x3f) + 0x80);
} }
return buf; return buf;

View File

@ -35,7 +35,7 @@
#include "file.h" #include "file.h"
#ifndef lint #ifndef lint
FILE_RCSID("@(#)$File: compress.c,v 1.127 2020/05/31 00:11:06 christos Exp $") FILE_RCSID("@(#)$File: compress.c,v 1.129 2020/12/08 21:26:00 christos Exp $")
#endif #endif
#include "magic.h" #include "magic.h"
@ -72,7 +72,7 @@ typedef void (*sig_t)(int);
#include <bzlib.h> #include <bzlib.h>
#endif #endif
#if defined(HAVE_XZLIB_H) && defined(XZLIBSUPPORT) #if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT)
#define BUILTIN_XZLIB #define BUILTIN_XZLIB
#include <lzma.h> #include <lzma.h>
#endif #endif
@ -847,8 +847,23 @@ uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old,
for (i = 0; i < __arraycount(fdp); i++) for (i = 0; i < __arraycount(fdp); i++)
fdp[i][0] = fdp[i][1] = -1; fdp[i][0] = fdp[i][1] = -1;
if ((fd == -1 && pipe(fdp[STDIN_FILENO]) == -1) || /*
pipe(fdp[STDOUT_FILENO]) == -1 || pipe(fdp[STDERR_FILENO]) == -1) { * There are multithreaded users who run magic_file()
* from dozens of threads. If two parallel magic_file() calls
* analyze two large compressed files, both will spawn
* an uncompressing child here, which writes out uncompressed data.
* We read some portion, then close the pipe, then waitpid() the child.
* If uncompressed data is larger, child shound get EPIPE and exit.
* However, with *parallel* calls OTHER child may unintentionally
* inherit pipe fds, thus keeping pipe open and making writes in
* our child block instead of failing with EPIPE!
* (For the bug to occur, two threads must mutually inherit their pipes,
* and both must have large outputs. Thus it happens not that often).
* To avoid this, be sure to create pipes with O_CLOEXEC.
*/
if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) ||
file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 ||
file_pipe_closexec(fdp[STDERR_FILENO]) == -1) {
closep(fdp[STDIN_FILENO]); closep(fdp[STDIN_FILENO]);
closep(fdp[STDOUT_FILENO]); closep(fdp[STDOUT_FILENO]);
return makeerror(newch, n, "Cannot create pipe, %s", return makeerror(newch, n, "Cannot create pipe, %s",
@ -879,16 +894,20 @@ uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old,
if (fdp[STDIN_FILENO][1] > 2) if (fdp[STDIN_FILENO][1] > 2)
(void) close(fdp[STDIN_FILENO][1]); (void) close(fdp[STDIN_FILENO][1]);
} }
file_clear_closexec(STDIN_FILENO);
///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
if (copydesc(STDOUT_FILENO, fdp[STDOUT_FILENO][1])) if (copydesc(STDOUT_FILENO, fdp[STDOUT_FILENO][1]))
(void) close(fdp[STDOUT_FILENO][1]); (void) close(fdp[STDOUT_FILENO][1]);
if (fdp[STDOUT_FILENO][0] > 2) if (fdp[STDOUT_FILENO][0] > 2)
(void) close(fdp[STDOUT_FILENO][0]); (void) close(fdp[STDOUT_FILENO][0]);
file_clear_closexec(STDOUT_FILENO);
if (copydesc(STDERR_FILENO, fdp[STDERR_FILENO][1])) if (copydesc(STDERR_FILENO, fdp[STDERR_FILENO][1]))
(void) close(fdp[STDERR_FILENO][1]); (void) close(fdp[STDERR_FILENO][1]);
if (fdp[STDERR_FILENO][0] > 2) if (fdp[STDERR_FILENO][0] > 2)
(void) close(fdp[STDERR_FILENO][0]); (void) close(fdp[STDERR_FILENO][0]);
file_clear_closexec(STDERR_FILENO);
(void)execvp(compr[method].argv[0], (void)execvp(compr[method].argv[0],
RCAST(char *const *, RCAST(intptr_t, compr[method].argv))); RCAST(char *const *, RCAST(intptr_t, compr[method].argv)));

View File

@ -1 +0,0 @@
#include "php.h"

View File

@ -35,7 +35,7 @@
#include "file.h" #include "file.h"
#ifndef lint #ifndef lint
FILE_RCSID("@(#)$File: der.c,v 1.20 2020/06/07 19:10:37 christos Exp $") FILE_RCSID("@(#)$File: der.c,v 1.21 2020/06/15 00:58:10 christos Exp $")
#endif #endif
#else #else
#define SIZE_T_FORMAT "z" #define SIZE_T_FORMAT "z"
@ -249,7 +249,6 @@ der_data(char *buf, size_t blen, uint32_t tag, const void *q, uint32_t len)
return snprintf(buf, blen, return snprintf(buf, blen,
"20%c%c-%c%c-%c%c %c%c:%c%c:%c%c GMT", d[0], d[1], d[2], "20%c%c-%c%c-%c%c %c%c:%c%c:%c%c GMT", d[0], d[1], d[2],
d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11]); d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11]);
break;
default: default:
break; break;
} }

View File

@ -35,7 +35,7 @@
#include "file.h" #include "file.h"
#ifndef lint #ifndef lint
FILE_RCSID("@(#)$File: encoding.c,v 1.21 2019/06/08 20:49:14 christos Exp $") FILE_RCSID("@(#)$File: encoding.c,v 1.27 2021/02/05 21:33:49 christos Exp $")
#endif /* lint */ #endif /* lint */
#include "magic.h" #include "magic.h"
@ -43,14 +43,20 @@ FILE_RCSID("@(#)$File: encoding.c,v 1.21 2019/06/08 20:49:14 christos Exp $")
#include <stdlib.h> #include <stdlib.h>
private int looks_ascii(const unsigned char *, size_t, unicodechar *, size_t *); private int looks_ascii(const unsigned char *, size_t, file_unichar_t *,
private int looks_utf8_with_BOM(const unsigned char *, size_t, unicodechar *, size_t *);
private int looks_utf8_with_BOM(const unsigned char *, size_t, file_unichar_t *,
size_t *);
private int looks_utf7(const unsigned char *, size_t, file_unichar_t *,
size_t *);
private int looks_ucs16(const unsigned char *, size_t, file_unichar_t *,
size_t *);
private int looks_ucs32(const unsigned char *, size_t, file_unichar_t *,
size_t *);
private int looks_latin1(const unsigned char *, size_t, file_unichar_t *,
size_t *);
private int looks_extended(const unsigned char *, size_t, file_unichar_t *,
size_t *); size_t *);
private int looks_utf7(const unsigned char *, size_t, unicodechar *, size_t *);
private int looks_ucs16(const unsigned char *, size_t, unicodechar *, size_t *);
private int looks_ucs32(const unsigned char *, size_t, unicodechar *, size_t *);
private int looks_latin1(const unsigned char *, size_t, unicodechar *, size_t *);
private int looks_extended(const unsigned char *, size_t, unicodechar *, size_t *);
private void from_ebcdic(const unsigned char *, size_t, unsigned char *); private void from_ebcdic(const unsigned char *, size_t, unsigned char *);
#ifdef DEBUG_ENCODING #ifdef DEBUG_ENCODING
@ -62,19 +68,20 @@ private void from_ebcdic(const unsigned char *, size_t, unsigned char *);
/* /*
* Try to determine whether text is in some character code we can * Try to determine whether text is in some character code we can
* identify. Each of these tests, if it succeeds, will leave * identify. Each of these tests, if it succeeds, will leave
* the text converted into one-unicodechar-per-character Unicode in * the text converted into one-file_unichar_t-per-character Unicode in
* ubuf, and the number of characters converted in ulen. * ubuf, and the number of characters converted in ulen.
*/ */
protected int protected int
file_encoding(struct magic_set *ms, const struct buffer *b, unicodechar **ubuf, file_encoding(struct magic_set *ms, const struct buffer *b,
size_t *ulen, const char **code, const char **code_mime, const char **type) file_unichar_t **ubuf, size_t *ulen, const char **code,
const char **code_mime, const char **type)
{ {
const unsigned char *buf = CAST(const unsigned char *, b->fbuf); const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
size_t nbytes = b->flen; size_t nbytes = b->flen;
size_t mlen; size_t mlen;
int rv = 1, ucs_type; int rv = 1, ucs_type;
unsigned char *nbuf = NULL; unsigned char *nbuf = NULL;
unicodechar *udefbuf; file_unichar_t *udefbuf;
size_t udeflen; size_t udeflen;
if (ubuf == NULL) if (ubuf == NULL)
@ -87,8 +94,12 @@ file_encoding(struct magic_set *ms, const struct buffer *b, unicodechar **ubuf,
*code = "unknown"; *code = "unknown";
*code_mime = "binary"; *code_mime = "binary";
if (nbytes > ms->encoding_max)
nbytes = ms->encoding_max;
mlen = (nbytes + 1) * sizeof((*ubuf)[0]); mlen = (nbytes + 1) * sizeof((*ubuf)[0]);
if ((*ubuf = CAST(unicodechar *, ecalloc(CAST(size_t, 1), mlen))) == NULL) { *ubuf = CAST(file_unichar_t *, ecalloc(CAST(size_t, 1), mlen));
if (*ubuf == NULL) {
file_oomem(ms, mlen); file_oomem(ms, mlen);
goto done; goto done;
} }
@ -102,7 +113,7 @@ file_encoding(struct magic_set *ms, const struct buffer *b, unicodechar **ubuf,
if (looks_ascii(buf, nbytes, *ubuf, ulen)) { if (looks_ascii(buf, nbytes, *ubuf, ulen)) {
if (looks_utf7(buf, nbytes, *ubuf, ulen) > 0) { if (looks_utf7(buf, nbytes, *ubuf, ulen) > 0) {
DPRINTF(("utf-7 %" SIZE_T_FORMAT "u\n", *ulen)); DPRINTF(("utf-7 %" SIZE_T_FORMAT "u\n", *ulen));
*code = "UTF-7 Unicode"; *code = "Unicode text, UTF-7";
*code_mime = "utf-7"; *code_mime = "utf-7";
} else { } else {
DPRINTF(("ascii %" SIZE_T_FORMAT "u\n", *ulen)); DPRINTF(("ascii %" SIZE_T_FORMAT "u\n", *ulen));
@ -111,27 +122,27 @@ file_encoding(struct magic_set *ms, const struct buffer *b, unicodechar **ubuf,
} }
} else if (looks_utf8_with_BOM(buf, nbytes, *ubuf, ulen) > 0) { } else if (looks_utf8_with_BOM(buf, nbytes, *ubuf, ulen) > 0) {
DPRINTF(("utf8/bom %" SIZE_T_FORMAT "u\n", *ulen)); DPRINTF(("utf8/bom %" SIZE_T_FORMAT "u\n", *ulen));
*code = "UTF-8 Unicode (with BOM)"; *code = "Unicode text, UTF-8 (with BOM)";
*code_mime = "utf-8"; *code_mime = "utf-8";
} else if (file_looks_utf8(buf, nbytes, *ubuf, ulen) > 1) { } else if (file_looks_utf8(buf, nbytes, *ubuf, ulen) > 1) {
DPRINTF(("utf8 %" SIZE_T_FORMAT "u\n", *ulen)); DPRINTF(("utf8 %" SIZE_T_FORMAT "u\n", *ulen));
*code = "UTF-8 Unicode"; *code = "Unicode text, UTF-8";
*code_mime = "utf-8"; *code_mime = "utf-8";
} else if ((ucs_type = looks_ucs32(buf, nbytes, *ubuf, ulen)) != 0) { } else if ((ucs_type = looks_ucs32(buf, nbytes, *ubuf, ulen)) != 0) {
if (ucs_type == 1) { if (ucs_type == 1) {
*code = "Little-endian UTF-32 Unicode"; *code = "Unicode text, UTF-32, little-endian";
*code_mime = "utf-32le"; *code_mime = "utf-32le";
} else { } else {
*code = "Big-endian UTF-32 Unicode"; *code = "Unicode text, UTF-32, big-endian";
*code_mime = "utf-32be"; *code_mime = "utf-32be";
} }
DPRINTF(("ucs32 %" SIZE_T_FORMAT "u\n", *ulen)); DPRINTF(("ucs32 %" SIZE_T_FORMAT "u\n", *ulen));
} else if ((ucs_type = looks_ucs16(buf, nbytes, *ubuf, ulen)) != 0) { } else if ((ucs_type = looks_ucs16(buf, nbytes, *ubuf, ulen)) != 0) {
if (ucs_type == 1) { if (ucs_type == 1) {
*code = "Little-endian UTF-16 Unicode"; *code = "Unicode text, UTF-16, little-endian";
*code_mime = "utf-16le"; *code_mime = "utf-16le";
} else { } else {
*code = "Big-endian UTF-16 Unicode"; *code = "Unicode text, UTF-16, big-endian";
*code_mime = "utf-16be"; *code_mime = "utf-16be";
} }
DPRINTF(("ucs16 %" SIZE_T_FORMAT "u\n", *ulen)); DPRINTF(("ucs16 %" SIZE_T_FORMAT "u\n", *ulen));
@ -249,64 +260,40 @@ private char text_chars[256] = {
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */
}; };
private int #define LOOKS(NAME, COND) \
looks_ascii(const unsigned char *buf, size_t nbytes, unicodechar *ubuf, private int \
size_t *ulen) looks_ ## NAME(const unsigned char *buf, size_t nbytes, file_unichar_t *ubuf, \
{ size_t *ulen) \
size_t i; { \
size_t i, u; \
*ulen = 0; unsigned char dist[256]; \
memset(dist, 0, sizeof(dist)); \
for (i = 0; i < nbytes; i++) { \
int t = text_chars[buf[i]]; *ulen = 0; \
\
if (t != T) for (i = 0; i < nbytes; i++) { \
return 0; int t = text_chars[buf[i]]; \
\
ubuf[(*ulen)++] = buf[i]; if (COND) \
} return 0; \
\
return 1; ubuf[(*ulen)++] = buf[i]; \
dist[buf[i]]++; \
} \
u = 0; \
for (i = 0; i < __arraycount(dist); i++) { \
if (dist[i]) \
u += dist[i]; \
} \
if (u < 3) \
return 0; \
\
return 1; \
} }
private int LOOKS(ascii, t != T)
looks_latin1(const unsigned char *buf, size_t nbytes, unicodechar *ubuf, size_t *ulen) LOOKS(latin1, t != T && t != I)
{ LOOKS(extended, t != T && t != I && t != X)
size_t i;
*ulen = 0;
for (i = 0; i < nbytes; i++) {
int t = text_chars[buf[i]];
if (t != T && t != I)
return 0;
ubuf[(*ulen)++] = buf[i];
}
return 1;
}
private int
looks_extended(const unsigned char *buf, size_t nbytes, unicodechar *ubuf,
size_t *ulen)
{
size_t i;
*ulen = 0;
for (i = 0; i < nbytes; i++) {
int t = text_chars[buf[i]];
if (t != T && t != I && t != X)
return 0;
ubuf[(*ulen)++] = buf[i];
}
return 1;
}
/* /*
* Decide whether some text looks like UTF-8. Returns: * Decide whether some text looks like UTF-8. Returns:
@ -319,12 +306,65 @@ looks_extended(const unsigned char *buf, size_t nbytes, unicodechar *ubuf,
* If ubuf is non-NULL on entry, text is decoded into ubuf, *ulen; * If ubuf is non-NULL on entry, text is decoded into ubuf, *ulen;
* ubuf must be big enough! * ubuf must be big enough!
*/ */
// from: https://golang.org/src/unicode/utf8/utf8.go
#define XX 0xF1 // invalid: size 1
#define AS 0xF0 // ASCII: size 1
#define S1 0x02 // accept 0, size 2
#define S2 0x13 // accept 1, size 3
#define S3 0x03 // accept 0, size 3
#define S4 0x23 // accept 2, size 3
#define S5 0x34 // accept 3, size 4
#define S6 0x04 // accept 0, size 4
#define S7 0x44 // accept 4, size 4
#define LOCB 0x80
#define HICB 0xBF
// first is information about the first byte in a UTF-8 sequence.
static const uint8_t first[] = {
// 1 2 3 4 5 6 7 8 9 A B C D E F
AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x00-0x0F
AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x10-0x1F
AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x20-0x2F
AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x30-0x3F
AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x40-0x4F
AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x50-0x5F
AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x60-0x6F
AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x70-0x7F
// 1 2 3 4 5 6 7 8 9 A B C D E F
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0x80-0x8F
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0x90-0x9F
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0xA0-0xAF
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0xB0-0xBF
XX, XX, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, // 0xC0-0xCF
S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, // 0xD0-0xDF
S2, S3, S3, S3, S3, S3, S3, S3, S3, S3, S3, S3, S3, S4, S3, S3, // 0xE0-0xEF
S5, S6, S6, S6, S7, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0xF0-0xFF
};
// acceptRange gives the range of valid values for the second byte in a UTF-8
// sequence.
struct accept_range {
uint8_t lo; // lowest value for second byte.
uint8_t hi; // highest value for second byte.
} accept_ranges[16] = {
// acceptRanges has size 16 to avoid bounds checks in the code that uses it.
{ LOCB, HICB },
{ 0xA0, HICB },
{ LOCB, 0x9F },
{ 0x90, HICB },
{ LOCB, 0x8F },
};
protected int protected int
file_looks_utf8(const unsigned char *buf, size_t nbytes, unicodechar *ubuf, size_t *ulen) file_looks_utf8(const unsigned char *buf, size_t nbytes, file_unichar_t *ubuf,
size_t *ulen)
{ {
size_t i; size_t i;
int n; int n;
unicodechar c; file_unichar_t c;
int gotone = 0, ctrl = 0; int gotone = 0, ctrl = 0;
if (ubuf) if (ubuf)
@ -346,6 +386,10 @@ file_looks_utf8(const unsigned char *buf, size_t nbytes, unicodechar *ubuf, size
return -1; return -1;
} else { /* 11xxxxxx begins UTF-8 */ } else { /* 11xxxxxx begins UTF-8 */
int following; int following;
uint8_t x = first[buf[i]];
const struct accept_range *ar = &accept_ranges[x >> 4];
if (x == XX)
return -1;
if ((buf[i] & 0x20) == 0) { /* 110xxxxx */ if ((buf[i] & 0x20) == 0) { /* 110xxxxx */
c = buf[i] & 0x1f; c = buf[i] & 0x1f;
@ -370,6 +414,10 @@ file_looks_utf8(const unsigned char *buf, size_t nbytes, unicodechar *ubuf, size
if (i >= nbytes) if (i >= nbytes)
goto done; goto done;
if (n == 0 &&
(buf[i] < ar->lo || buf[i] > ar->hi))
return -1;
if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40)) if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40))
return -1; return -1;
@ -391,8 +439,8 @@ done:
* rest of the text. * rest of the text.
*/ */
private int private int
looks_utf8_with_BOM(const unsigned char *buf, size_t nbytes, unicodechar *ubuf, looks_utf8_with_BOM(const unsigned char *buf, size_t nbytes,
size_t *ulen) file_unichar_t *ubuf, size_t *ulen)
{ {
if (nbytes > 3 && buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf) if (nbytes > 3 && buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf)
return file_looks_utf8(buf + 3, nbytes - 3, ubuf, ulen); return file_looks_utf8(buf + 3, nbytes - 3, ubuf, ulen);
@ -401,7 +449,8 @@ looks_utf8_with_BOM(const unsigned char *buf, size_t nbytes, unicodechar *ubuf,
} }
private int private int
looks_utf7(const unsigned char *buf, size_t nbytes, unicodechar *ubuf, size_t *ulen) looks_utf7(const unsigned char *buf, size_t nbytes, file_unichar_t *ubuf,
size_t *ulen)
{ {
if (nbytes > 4 && buf[0] == '+' && buf[1] == '/' && buf[2] == 'v') if (nbytes > 4 && buf[0] == '+' && buf[1] == '/' && buf[2] == 'v')
switch (buf[3]) { switch (buf[3]) {
@ -420,7 +469,7 @@ looks_utf7(const unsigned char *buf, size_t nbytes, unicodechar *ubuf, size_t *u
} }
private int private int
looks_ucs16(const unsigned char *bf, size_t nbytes, unicodechar *ubf, looks_ucs16(const unsigned char *bf, size_t nbytes, file_unichar_t *ubf,
size_t *ulen) size_t *ulen)
{ {
int bigend; int bigend;
@ -443,10 +492,10 @@ looks_ucs16(const unsigned char *bf, size_t nbytes, unicodechar *ubf,
if (bigend) if (bigend)
ubf[(*ulen)++] = bf[i + 1] ubf[(*ulen)++] = bf[i + 1]
| (CAST(unicodechar, bf[i]) << 8); | (CAST(file_unichar_t, bf[i]) << 8);
else else
ubf[(*ulen)++] = bf[i] ubf[(*ulen)++] = bf[i]
| (CAST(unicodechar, bf[i + 1]) << 8); | (CAST(file_unichar_t, bf[i + 1]) << 8);
if (ubf[*ulen - 1] == 0xfffe) if (ubf[*ulen - 1] == 0xfffe)
return 0; return 0;
@ -459,7 +508,7 @@ looks_ucs16(const unsigned char *bf, size_t nbytes, unicodechar *ubf,
} }
private int private int
looks_ucs32(const unsigned char *bf, size_t nbytes, unicodechar *ubf, looks_ucs32(const unsigned char *bf, size_t nbytes, file_unichar_t *ubf,
size_t *ulen) size_t *ulen)
{ {
int bigend; int bigend;
@ -481,15 +530,15 @@ looks_ucs32(const unsigned char *bf, size_t nbytes, unicodechar *ubf,
/* XXX fix to properly handle chars > 65536 */ /* XXX fix to properly handle chars > 65536 */
if (bigend) if (bigend)
ubf[(*ulen)++] = CAST(unicodechar, bf[i + 3]) ubf[(*ulen)++] = CAST(file_unichar_t, bf[i + 3])
| (CAST(unicodechar, bf[i + 2]) << 8) | (CAST(file_unichar_t, bf[i + 2]) << 8)
| (CAST(unicodechar, bf[i + 1]) << 16) | (CAST(file_unichar_t, bf[i + 1]) << 16)
| (CAST(unicodechar, bf[i]) << 24); | (CAST(file_unichar_t, bf[i]) << 24);
else else
ubf[(*ulen)++] = CAST(unicodechar, bf[i + 0]) ubf[(*ulen)++] = CAST(file_unichar_t, bf[i + 0])
| (CAST(unicodechar, bf[i + 1]) << 8) | (CAST(file_unichar_t, bf[i + 1]) << 8)
| (CAST(unicodechar, bf[i + 2]) << 16) | (CAST(file_unichar_t, bf[i + 2]) << 16)
| (CAST(unicodechar, bf[i + 3]) << 24); | (CAST(file_unichar_t, bf[i + 3]) << 24);
if (ubf[*ulen - 1] == 0xfffe) if (ubf[*ulen - 1] == 0xfffe)
return 0; return 0;

View File

@ -27,7 +27,7 @@
*/ */
/* /*
* file.h - definitions for file(1) program * file.h - definitions for file(1) program
* @(#)$File: file.h,v 1.220 2020/06/08 17:38:27 christos Exp $ * @(#)$File: file.h,v 1.225 2021/02/05 22:29:07 christos Exp $
*/ */
#ifndef __file_h__ #ifndef __file_h__
@ -35,6 +35,7 @@
#include "config.h" #include "config.h"
#include "php.h"
#include "ext/standard/php_string.h" #include "ext/standard/php_string.h"
#include "ext/pcre/php_pcre.h" #include "ext/pcre/php_pcre.h"
@ -136,6 +137,14 @@
#define MAX(a,b) (((a) > (b)) ? (a) : (b)) #define MAX(a,b) (((a) > (b)) ? (a) : (b))
#endif #endif
#ifndef O_CLOEXEC
# define O_CLOEXEC 0
#endif
#ifndef FD_CLOEXEC
# define FD_CLOEXEC 1
#endif
#define FILE_BADSIZE CAST(size_t, ~0ul) #define FILE_BADSIZE CAST(size_t, ~0ul)
#define MAXDESC 64 /* max len of text description/MIME type */ #define MAXDESC 64 /* max len of text description/MIME type */
#define MAXMIME 80 /* max len of text MIME type */ #define MAXMIME 80 /* max len of text MIME type */
@ -403,14 +412,16 @@ struct level_info {
#endif #endif
}; };
struct cont {
size_t len;
struct level_info *li;
};
#define MAGIC_SETS 2 #define MAGIC_SETS 2
struct magic_set { struct magic_set {
struct mlist *mlist[MAGIC_SETS]; /* list of regular entries */ struct mlist *mlist[MAGIC_SETS]; /* list of regular entries */
struct cont { struct cont c;
size_t len;
struct level_info *li;
} c;
struct out { struct out {
char *buf; /* Accumulation buffer */ char *buf; /* Accumulation buffer */
size_t blen; /* Length of buffer */ size_t blen; /* Length of buffer */
@ -445,6 +456,7 @@ struct magic_set {
uint16_t elf_notes_max; uint16_t elf_notes_max;
uint16_t regex_max; uint16_t regex_max;
size_t bytes_max; /* number of bytes to read from file */ size_t bytes_max; /* number of bytes to read from file */
size_t encoding_max; /* bytes to look for encoding */
#ifndef FILE_BYTES_MAX #ifndef FILE_BYTES_MAX
# define FILE_BYTES_MAX (1024 * 1024) /* how much of the file to look at */ # define FILE_BYTES_MAX (1024 * 1024) /* how much of the file to look at */
#endif #endif
@ -454,11 +466,13 @@ struct magic_set {
#define FILE_INDIR_MAX 50 #define FILE_INDIR_MAX 50
#define FILE_NAME_MAX 50 #define FILE_NAME_MAX 50
#define FILE_REGEX_MAX 8192 #define FILE_REGEX_MAX 8192
#define FILE_ENCODING_MAX (64 * 1024)
}; };
/* Type for Unicode characters */ /* Type for Unicode characters */
typedef unsigned long unicodechar; typedef unsigned long file_unichar_t;
struct stat;
#define FILE_T_LOCAL 1 #define FILE_T_LOCAL 1
#define FILE_T_WINDOWS 2 #define FILE_T_WINDOWS 2
protected const char *file_fmttime(char *, size_t, uint64_t, int); protected const char *file_fmttime(char *, size_t, uint64_t, int);
@ -468,6 +482,8 @@ protected int file_buffer(struct magic_set *, php_stream *, zend_stat_t *, const
size_t); size_t);
protected int file_fsmagic(struct magic_set *, const char *, zend_stat_t *); protected int file_fsmagic(struct magic_set *, const char *, zend_stat_t *);
protected int file_pipe2file(struct magic_set *, int, const void *, size_t); protected int file_pipe2file(struct magic_set *, int, const void *, size_t);
protected int file_vprintf(struct magic_set *, const char *, va_list)
__attribute__((__format__(__printf__, 2, 0)));
protected int file_separator(struct magic_set *); protected int file_separator(struct magic_set *);
protected char *file_copystr(char *, size_t, size_t, const char *); protected char *file_copystr(char *, size_t, size_t, const char *);
protected int file_checkfmt(char *, size_t, const char *); protected int file_checkfmt(char *, size_t, const char *);
@ -486,15 +502,17 @@ protected int file_zmagic(struct magic_set *, const struct buffer *,
protected int file_ascmagic(struct magic_set *, const struct buffer *, protected int file_ascmagic(struct magic_set *, const struct buffer *,
int); int);
protected int file_ascmagic_with_encoding(struct magic_set *, protected int file_ascmagic_with_encoding(struct magic_set *,
const struct buffer *, unicodechar *, size_t, const char *, const char *, int); const struct buffer *, file_unichar_t *, size_t, const char *, const char *, int);
protected int file_encoding(struct magic_set *, const struct buffer *, protected int file_encoding(struct magic_set *, const struct buffer *,
unicodechar **, size_t *, const char **, const char **, const char **); file_unichar_t **, size_t *, const char **, const char **, const char **);
protected int file_is_json(struct magic_set *, const struct buffer *); protected int file_is_json(struct magic_set *, const struct buffer *);
protected int file_is_csv(struct magic_set *, const struct buffer *, int); protected int file_is_csv(struct magic_set *, const struct buffer *, int);
protected int file_is_tar(struct magic_set *, const struct buffer *); protected int file_is_tar(struct magic_set *, const struct buffer *);
protected int file_softmagic(struct magic_set *, const struct buffer *, protected int file_softmagic(struct magic_set *, const struct buffer *,
uint16_t *, uint16_t *, int, int); uint16_t *, uint16_t *, int, int);
protected int file_apprentice(struct magic_set *, const char *, int); protected int file_apprentice(struct magic_set *, const char *, int);
protected int buffer_apprentice(struct magic_set *, struct magic **,
size_t *, size_t);
protected int file_magicfind(struct magic_set *, const char *, struct mlist *); protected int file_magicfind(struct magic_set *, const char *, struct mlist *);
protected uint64_t file_signextend(struct magic_set *, struct magic *, protected uint64_t file_signextend(struct magic_set *, struct magic *,
uint64_t); uint64_t);
@ -510,7 +528,7 @@ protected size_t file_mbswidth(const char *);
protected const char *file_getbuffer(struct magic_set *); protected const char *file_getbuffer(struct magic_set *);
protected ssize_t sread(int, void *, size_t, int); protected ssize_t sread(int, void *, size_t, int);
protected int file_check_mem(struct magic_set *, unsigned int); protected int file_check_mem(struct magic_set *, unsigned int);
protected int file_looks_utf8(const unsigned char *, size_t, unicodechar *, protected int file_looks_utf8(const unsigned char *, size_t, file_unichar_t *,
size_t *); size_t *);
protected size_t file_pstring_length_size(struct magic_set *, protected size_t file_pstring_length_size(struct magic_set *,
const struct magic *); const struct magic *);
@ -521,6 +539,9 @@ protected char * file_printable(char *, size_t, const char *, size_t);
protected int file_os2_apptype(struct magic_set *, const char *, const void *, protected int file_os2_apptype(struct magic_set *, const char *, const void *,
size_t); size_t);
#endif /* __EMX__ */ #endif /* __EMX__ */
protected int file_pipe_closexec(int *);
protected int file_clear_closexec(int);
protected char *file_strtrim(char *);
protected void buffer_init(struct buffer *, int, const zend_stat_t *, protected void buffer_init(struct buffer *, int, const zend_stat_t *,
const void *, size_t); const void *, size_t);

View File

@ -27,7 +27,7 @@
#include "file.h" #include "file.h"
#ifndef lint #ifndef lint
FILE_RCSID("@(#)$File: funcs.c,v 1.115 2020/02/20 15:50:20 christos Exp $") FILE_RCSID("@(#)$File: funcs.c,v 1.121 2021/02/05 22:29:07 christos Exp $")
#endif /* lint */ #endif /* lint */
#include "magic.h" #include "magic.h"
@ -36,6 +36,9 @@ FILE_RCSID("@(#)$File: funcs.c,v 1.115 2020/02/20 15:50:20 christos Exp $")
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <ctype.h> #include <ctype.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h> /* for pipe2() */
#endif
#if defined(HAVE_WCHAR_H) #if defined(HAVE_WCHAR_H)
#include <wchar.h> #include <wchar.h>
#endif #endif
@ -100,7 +103,7 @@ file_checkfmt(char *msg, size_t mlen, const char *fmt)
if (*++p == '%') if (*++p == '%')
continue; continue;
// Skip uninteresting. // Skip uninteresting.
while (strchr("0.'+- ", *p) != NULL) while (strchr("#0.'+- ", *p) != NULL)
p++; p++;
if (*p == '*') { if (*p == '*') {
if (msg) if (msg)
@ -126,27 +129,56 @@ file_checkfmt(char *msg, size_t mlen, const char *fmt)
return 0; return 0;
} }
/*
* Like printf, only we append to a buffer.
*/
protected int
file_vprintf(struct magic_set *ms, const char *fmt, va_list ap)
{
size_t len;
char *buf, *newstr;
char tbuf[1024];
if (ms->event_flags & EVENT_HAD_ERR)
return 0;
if (file_checkfmt(tbuf, sizeof(tbuf), fmt)) {
file_clearbuf(ms);
file_error(ms, 0, "Bad magic format `%s' (%s)", fmt, tbuf);
return -1;
}
len = vspprintf(&buf, 0, fmt, ap);
if (len > 1024 || len + ms->o.blen > 1024 * 1024) {
size_t blen = ms->o.blen;
if (buf) efree(buf);
file_clearbuf(ms);
file_error(ms, 0, "Output buffer space exceeded %d+%zu", len,
blen);
return -1;
}
if (ms->o.buf != NULL) {
len = spprintf(&newstr, 0, "%s%s", ms->o.buf, buf);
efree(buf);
efree(ms->o.buf);
buf = newstr;
}
ms->o.buf = buf;
ms->o.blen = len;
return 0;
}
protected int protected int
file_printf(struct magic_set *ms, const char *fmt, ...) file_printf(struct magic_set *ms, const char *fmt, ...)
{ {
int rv;
va_list ap; va_list ap;
char *buf = NULL, *newstr;
va_start(ap, fmt); va_start(ap, fmt);
vspprintf(&buf, 0, fmt, ap); rv = file_vprintf(ms, fmt, ap);
va_end(ap); va_end(ap);
return rv;
if (ms->o.buf != NULL) {
spprintf(&newstr, 0, "%s%s", ms->o.buf, (buf ? buf : ""));
if (buf) {
efree(buf);
}
efree(ms->o.buf);
ms->o.buf = newstr;
} else {
ms->o.buf = buf;
}
return 0;
} }
/* /*
@ -157,30 +189,18 @@ private void
file_error_core(struct magic_set *ms, int error, const char *f, va_list va, file_error_core(struct magic_set *ms, int error, const char *f, va_list va,
size_t lineno) size_t lineno)
{ {
char *buf = NULL;
/* Only the first error is ok */ /* Only the first error is ok */
if (ms->event_flags & EVENT_HAD_ERR) if (ms->event_flags & EVENT_HAD_ERR)
return; return;
if (lineno != 0) { if (lineno != 0) {
efree(ms->o.buf); file_clearbuf(ms);
ms->o.buf = NULL; (void)file_printf(ms, "line %" SIZE_T_FORMAT "u:", lineno);
file_printf(ms, "line %" SIZE_T_FORMAT "u:", lineno);
} }
if (ms->o.buf && *ms->o.buf)
vspprintf(&buf, 0, f, va); (void)file_printf(ms, " ");
va_end(va); (void)file_vprintf(ms, f, va);
if (error > 0)
if (error > 0) { (void)file_printf(ms, " (%s)", strerror(error));
file_printf(ms, "%s (%s)", (*buf ? buf : ""), strerror(error));
} else if (*buf) {
file_printf(ms, "%s", buf);
}
if (buf) {
efree(buf);
}
ms->event_flags |= EVENT_HAD_ERR; ms->event_flags |= EVENT_HAD_ERR;
ms->error = error; ms->error = error;
} }
@ -228,11 +248,31 @@ file_badread(struct magic_set *ms)
} }
#ifndef COMPILE_ONLY #ifndef COMPILE_ONLY
#define FILE_SEPARATOR "\n- "
protected int protected int
file_separator(struct magic_set *ms) file_separator(struct magic_set *ms)
{ {
return file_printf(ms, "\n- "); return file_printf(ms, FILE_SEPARATOR);
}
static void
trim_separator(struct magic_set *ms)
{
size_t l;
if (ms->o.buf == NULL)
return;
l = strlen(ms->o.buf);
if (l < sizeof(FILE_SEPARATOR))
return;
l -= sizeof(FILE_SEPARATOR) - 1;
if (strcmp(ms->o.buf + l, FILE_SEPARATOR) != 0)
return;
ms->o.buf[l] = '\0';
} }
static int static int
@ -450,6 +490,7 @@ simple:
rv = -1; rv = -1;
} }
done: done:
trim_separator(ms);
if ((ms->flags & MAGIC_MIME_ENCODING) != 0) { if ((ms->flags & MAGIC_MIME_ENCODING) != 0) {
if (ms->flags & MAGIC_MIME_TYPE) if (ms->flags & MAGIC_MIME_TYPE)
if (file_printf(ms, "; charset=") == -1) if (file_printf(ms, "; charset=") == -1)
@ -598,7 +639,7 @@ file_check_mem(struct magic_set *ms, unsigned int level)
protected size_t protected size_t
file_printedlen(const struct magic_set *ms) file_printedlen(const struct magic_set *ms)
{ {
return ms->o.buf == NULL ? 0 : strlen(ms->o.buf); return ms->o.blen;
} }
protected int protected int
@ -717,7 +758,7 @@ struct guid {
protected int protected int
file_parse_guid(const char *s, uint64_t *guid) file_parse_guid(const char *s, uint64_t *guid)
{ {
struct guid *g = CAST(struct guid *, guid); struct guid *g = CAST(struct guid *, CAST(void *, guid));
return sscanf(s, return sscanf(s,
"%8x-%4hx-%4hx-%2hhx%2hhx-%2hhx%2hhx%2hhx%2hhx%2hhx%2hhx", "%8x-%4hx-%4hx-%2hhx%2hhx-%2hhx%2hhx%2hhx%2hhx%2hhx%2hhx",
&g->data1, &g->data2, &g->data3, &g->data4[0], &g->data4[1], &g->data1, &g->data2, &g->data3, &g->data4[0], &g->data4[1],
@ -728,7 +769,8 @@ file_parse_guid(const char *s, uint64_t *guid)
protected int protected int
file_print_guid(char *str, size_t len, const uint64_t *guid) file_print_guid(char *str, size_t len, const uint64_t *guid)
{ {
const struct guid *g = CAST(const struct guid *, guid); const struct guid *g = CAST(const struct guid *,
CAST(const void *, guid));
return snprintf(str, len, "%.8X-%.4hX-%.4hX-%.2hhX%.2hhX-" return snprintf(str, len, "%.8X-%.4hX-%.4hX-%.2hhX%.2hhX-"
"%.2hhX%.2hhX%.2hhX%.2hhX%.2hhX%.2hhX", "%.2hhX%.2hhX%.2hhX%.2hhX%.2hhX%.2hhX",
@ -736,3 +778,39 @@ file_print_guid(char *str, size_t len, const uint64_t *guid)
g->data4[2], g->data4[3], g->data4[4], g->data4[5], g->data4[2], g->data4[3], g->data4[4], g->data4[5],
g->data4[6], g->data4[7]); g->data4[6], g->data4[7]);
} }
protected int
file_pipe_closexec(int *fds)
{
#ifdef HAVE_PIPE2
return pipe2(fds, O_CLOEXEC);
#else
if (pipe(fds) == -1)
return -1;
(void)fcntl(fds[0], F_SETFD, FD_CLOEXEC);
(void)fcntl(fds[1], F_SETFD, FD_CLOEXEC);
return 0;
#endif
}
protected int
file_clear_closexec(int fd) {
return fcntl(fd, F_SETFD, 0);
}
protected char *
file_strtrim(char *str)
{
char *last;
while (isspace(CAST(unsigned char, *str)))
str++;
last = str;
while (*last)
last++;
--last;
while (isspace(CAST(unsigned char, *last)))
last--;
*++last = '\0';
return str;
}

View File

@ -32,7 +32,7 @@
#include "file.h" #include "file.h"
#ifndef lint #ifndef lint
FILE_RCSID("@(#)$File: is_csv.c,v 1.4 2019/06/26 20:31:31 christos Exp $") FILE_RCSID("@(#)$File: is_csv.c,v 1.6 2020/08/09 16:43:36 christos Exp $")
#endif #endif
#include <string.h> #include <string.h>
@ -94,8 +94,7 @@ csv_parse(const unsigned char *uc, const unsigned char *ue)
size_t nf = 0, tf = 0, nl = 0; size_t nf = 0, tf = 0, nl = 0;
while (uc < ue) { while (uc < ue) {
unsigned char c; switch (*uc++) {
switch (c = *uc++) {
case '"': case '"':
// Eat until the matching quote // Eat until the matching quote
uc = eatquote(uc, ue); uc = eatquote(uc, ue);
@ -150,7 +149,7 @@ file_is_csv(struct magic_set *ms, const struct buffer *b, int looks_text)
return 1; return 1;
if (mime) { if (mime) {
if (file_printf(ms, "application/csv") == -1) if (file_printf(ms, "text/csv") == -1)
return -1; return -1;
return 1; return 1;
} }

View File

@ -28,7 +28,7 @@
#include "file.h" #include "file.h"
#ifndef lint #ifndef lint
FILE_RCSID("@(#)$File: magic.c,v 1.112 2020/06/08 19:44:10 christos Exp $") FILE_RCSID("@(#)$File: magic.c,v 1.114 2021/02/05 21:33:49 christos Exp $")
#endif /* lint */ #endif /* lint */
#include "magic.h" #include "magic.h"
@ -348,6 +348,9 @@ magic_setparam(struct magic_set *ms, int param, const void *val)
case MAGIC_PARAM_BYTES_MAX: case MAGIC_PARAM_BYTES_MAX:
ms->bytes_max = *CAST(const size_t *, val); ms->bytes_max = *CAST(const size_t *, val);
return 0; return 0;
case MAGIC_PARAM_ENCODING_MAX:
ms->encoding_max = *CAST(const size_t *, val);
return 0;
default: default:
errno = EINVAL; errno = EINVAL;
return -1; return -1;
@ -381,6 +384,9 @@ magic_getparam(struct magic_set *ms, int param, void *val)
case MAGIC_PARAM_BYTES_MAX: case MAGIC_PARAM_BYTES_MAX:
*CAST(size_t *, val) = ms->bytes_max; *CAST(size_t *, val) = ms->bytes_max;
return 0; return 0;
case MAGIC_PARAM_ENCODING_MAX:
*CAST(size_t *, val) = ms->encoding_max;
return 0;
default: default:
errno = EINVAL; errno = EINVAL;
return -1; return -1;

View File

@ -113,7 +113,7 @@ b\31transp_compression\0\
#define MAGIC_NO_CHECK_FORTRAN 0x000000 /* Don't check ascii/fortran */ #define MAGIC_NO_CHECK_FORTRAN 0x000000 /* Don't check ascii/fortran */
#define MAGIC_NO_CHECK_TROFF 0x000000 /* Don't check ascii/troff */ #define MAGIC_NO_CHECK_TROFF 0x000000 /* Don't check ascii/troff */
#define MAGIC_VERSION 539 /* This implementation */ #define MAGIC_VERSION 540 /* This implementation */
#ifdef __cplusplus #ifdef __cplusplus
@ -150,6 +150,7 @@ int magic_errno(magic_t);
#define MAGIC_PARAM_ELF_NOTES_MAX 4 #define MAGIC_PARAM_ELF_NOTES_MAX 4
#define MAGIC_PARAM_REGEX_MAX 5 #define MAGIC_PARAM_REGEX_MAX 5
#define MAGIC_PARAM_BYTES_MAX 6 #define MAGIC_PARAM_BYTES_MAX 6
#define MAGIC_PARAM_ENCODING_MAX 7
int magic_setparam(magic_t, int, const void *); int magic_setparam(magic_t, int, const void *);
int magic_getparam(magic_t, int, void *); int magic_getparam(magic_t, int, void *);

View File

@ -1,390 +0,0 @@
#define FILE_VERSION_MAJOR 5
#define patchlevel 37
/*
* Patchlevel file for Ian Darwin's MAGIC command.
* $File: patchlevel.h,v 1.68 2008/03/22 21:39:43 christos Exp $
*
* $Log$
* Revision 2.1 2019/05/30 22:27:12 ab
* Update libmagic to 5.37
*
* $Log$
* Revision 2.1 2018/04/26 22:27:12 ab
* Update libmagic to 5.33
*
* $Log$
* Revision 2.0 2017/10/11 22:27:12 ab
* Update libmagic to 5.31
*
* $Log$
* Revision 1.9 2016/11/24 22:27:12 ab
* Update libmagic to 5.29
*
* $Log$
* Revision 1.9 2016/10/11 22:27:12 ab
* Update libmagic to 5.28
*
* $Log$
* Revision 1.9 2015/03/06 22:27:12 ab
* Update libmagic to 5.2X
*
* $Log$
* Revision 1.8 2014/02/18 22:27:12 ab
* Update libmagic to 5.17
*
* $Log$
* Revision 1.7 2013/03/26 22:27:12 ab
* Update libmagic to 5.14
*
* $Log$
* Revision 1.6 2012/03/26 21:01:37 ab
* Update libmagic to 5.11
*
* Revision 1.5 2012/03/25 13:54:37 ab
* Update libmagic to 5.04
*
* Revision 1.4 2009/05/04 20:52:43 scottmac
* Update libmagic to 5.02
*
* Revision 1.3 2009/03/15 23:02:35 scottmac
* Update fileinfo to libmagic 5.00 and remove dependency on dirent.h on Windows
*
* Revision 1.2 2008/11/02 16:09:27 scottmac
* Update libmagic to 4.26 and add support for v6 of the magic file format.
*
* Revision 1.1 2008/07/11 14:13:50 derick
* - Move lib to libmagic
*
* Revision 1.1 2008/07/11 14:10:50 derick
* - Step one for bundling the libmagic library. Some config.m4 issues left.
*
* Revision 1.69 2008/07/02 15:27:05 christos
* welcome to 4.25
*
* Revision 1.68 2008/03/22 21:39:43 christos
* file 4.24
*
* Revision 1.67 2007/12/28 20:08:40 christos
* welcome to 4.23.
*
* Revision 1.66 2007/12/27 16:38:24 christos
* welcome to 4.22
*
* Revision 1.65 2007/05/24 17:22:27 christos
* Welcome to 4.21
*
* Revision 1.64 2007/03/01 22:14:55 christos
* welcome to 4.20
*
* Revision 1.63 2007/01/12 17:38:28 christos
* Use File id.
*
* Revision 1.62 2006/12/11 21:49:58 christos
* time for 4.19
*
* Revision 1.61 2006/10/31 21:18:09 christos
* bump
*
* Revision 1.60 2006/03/02 22:15:12 christos
* welcome to 4.17
*
* Revision 1.59 2005/10/17 17:15:21 christos
* welcome to 4.16
*
* Revision 1.58 2005/08/18 15:52:56 christos
* welcome to 4.15
*
* Revision 1.57 2005/06/25 15:52:14 christos
* Welcome to 4.14
*
* Revision 1.56 2005/02/09 19:25:13 christos
* Welcome to 4.13
*
* Revision 1.55 2004/11/24 18:57:47 christos
* Re-do the autoconf stuff once more; passes make dist now.
*
* Revision 1.54 2004/11/21 05:52:05 christos
* ready for 4.11
*
* Revision 1.53 2004/07/24 20:40:46 christos
* welcome to 4.10
*
* Revision 1.52 2004/04/07 00:32:25 christos
* welcome to 4.09
*
* Revision 1.51 2004/03/22 21:17:11 christos
* welcome to 4.08.
*
* Revision 1.50 2003/12/23 17:34:04 christos
* 4.07
*
* Revision 1.49 2003/10/15 02:08:27 christos
* welcome to 4.06
*
* Revision 1.48 2003/09/12 19:41:14 christos
* this is 4.04
*
* Revision 1.47 2003/05/23 21:38:21 christos
* welcome to 4.03
*
* Revision 1.46 2003/04/02 18:57:43 christos
* prepare for 4.02
*
* Revision 1.45 2003/03/26 15:37:25 christos
* - Pass lint
* - make NULL in magic_file mean stdin
* - Fix "-" argument to file to pass NULL to magic_file
* - avoid pointer casts by using memcpy
* - rename magic_buf -> magic_buffer
* - keep only the first error
* - manual page: new sentence, new line
* - fix typo in api function (magic_buf -> magic_buffer)
*
* Revision 1.44 2003/03/23 22:23:31 christos
* finish librarification.
*
* Revision 1.43 2003/03/23 21:16:26 christos
* update copyrights.
*
* Revision 1.42 2003/03/23 04:06:05 christos
* Library re-organization
*
* Revision 1.41 2003/02/27 20:53:45 christos
* - fix memory allocation problem (Jeff Johnson)
* - fix stack overflow corruption (David Endler)
* - fixes from NetBSD source (Antti Kantee)
* - magic fixes
*
* Revision 1.40 2003/02/08 18:33:53 christos
* - detect inttypes.h too (Dave Love <d.love@dl.ac.uk>)
* - eliminate unsigned char warnings (Petter Reinholdtsen <pere@hungry.com>)
* - better elf PT_NOTE handling (Nalin Dahyabhai <nalin@redhat.com>)
* - add options to format the output differently
* - much more magic.
*
* Revision 1.39 2002/07/03 18:57:52 christos
* - ansify/c99ize
* - more magic
* - better COMPILE_ONLY support.
* - new magic files.
* - fix solaris compilation problems.
*
* Revision 1.38 2002/05/16 18:45:56 christos
* - pt_note elf additions from NetBSD
* - EMX os specific changes (Alexander Mai)
* - stdint.h detection, acconfig.h fixes (Maciej W. Rozycki, Franz Korntner)
* - regex file additions (Kim Cromie)
* - getopt_long support and misc cleanups (Michael Piefel)
* - many magic fixes and additions
*
* Revision 1.37 2001/09/03 14:44:22 christos
* daylight/tm_isdst detection
* magic fixes
* don't eat the whole file if it has only nulls
*
* Revision 1.36 2001/07/22 21:04:15 christos
* - magic fixes
* - add new operators, pascal strings, UTC date printing, $HOME/.magic
* [from "Tom N Harris" <telliamed@mac.com>]
*
* Revision 1.35 2001/04/24 14:40:25 christos
* - rename magic file sgi to mips and fix it
* - add support for building magic.mgc
* - portability fixes for mmap()
* - try gzip before uncompress, because uncompress sometimes hangs
* - be more conservative about pipe reads and writes
* - many magic fixes
*
* Revision 1.34 2001/03/12 05:05:57 christos
* - new compiled magic format
* - lots of magic additions
*
* Revision 1.33 2000/11/13 00:30:50 christos
* - wordperfect magic fix: freebsd pr 9388
* - more msdos fixes from freebsd pr's 20131 and 20812
* - sas and spss magic [Bruce Foster]
* - mkinstalldirs [John Fremlin]
* - sgi opengl fixes [Michael Pruett]
* - netbsd magic fixes [Ignatios Souvatzis]
* - audio additions [Michael Pruett]
* - fix problem with non ansi RCSID [Andreas Ley]
* - oggs magic [Felix von Leitner]
* - gmon magic [Eugen Dedu]
* - TNEF magic [Joomy]
* - netpbm magic and misc other image stuff [Bryan Henderson]
*
* Revision 1.32 2000/08/05 18:24:18 christos
* Correct indianness detection in elf (Charles Hannum)
* FreeBSD elf core support (Guy Harris)
* Use gzip in systems that don't have uncompress (Anthon van der Neut)
* Internationalization/EBCDIC support (Eric Fisher)
* Many many magic changes
*
* Revision 1.31 2000/05/14 17:58:36 christos
* - new magic for claris files
* - new magic for mathematica and maple files
* - new magic for msvc files
* - new -k flag to keep going matching all possible entries
* - add the word executable on #! magic files, and fix the usage of
* the word script
* - lots of other magic fixes
* - fix typo test -> text
*
* Revision 1.30 2000/04/11 02:41:17 christos
* - add support for mime output (-i)
* - make sure we free memory in case realloc fails
* - magic fixes
*
* Revision 1.29 1999/11/28 20:02:29 christos
* new string/[Bcb] magic from anthon, and adjustments to the magic files to
* use it.
*
* Revision 1.28 1999/10/31 22:11:48 christos
* - add "char" type for compatibility with HP/UX
* - recognize HP/UX syntax &=n etc.
* - include errno.h for CYGWIN
* - conditionalize the S_IS* macros
* - revert the SHT_DYNSYM test that broke the linux stripped binaries test
* - lots of Magdir changes
*
* Revision 1.27 1999/02/14 17:21:41 christos
* Automake support and misc cleanups from Rainer Orth
* Enable reading character and block special files from Dale R. Worley
*
* Revision 1.26 1998/09/12 13:19:39 christos
* - add support for bi-endian indirect offsets (Richard Verhoeven)
* - add recognition for bcpl (Joseph Myers)
* - remove non magic files from Magdir to avoid difficulties building
* on os2 where files are case independent
* - magic fixes.
*
* Revision 1.25 1998/06/27 14:04:04 christos
* OLF patch Guy Harris
* Recognize java/html (debian linux)
* Const poisoning (debian linux)
* More magic!
*
* Revision 1.24 1998/02/15 23:20:38 christos
* Autoconf patch: Felix von Leitner <leitner@math.fu-berlin.de>
* More magic fixes
* Elf64 fixes
*
* Revision 1.23 1997/11/05 16:03:37 christos
* - correct elf prps offset for SunOS-2.5.1 [guy@netapp.com]
* - handle 64 bit time_t's correctly [ewt@redhat.com]
* - new mime style magic [clarosse@netvista.net]
* - new TI calculator magic [rmcguire@freenet.columbus.oh.us]
* - new figlet fonts [obrien@freebsd.org]
* - new cisco magic, and elf fixes [jhawk@bbnplanet.com]
* - -b flag addition, and x86 filesystem magic [vax@linkhead.paranoia.com]
* - s/Mpeg/MPEG, header and elf typo fixes [guy@netapp.com]
* - Windows/NT registry files, audio code [guy@netapp.com]
* - libGrx graphics lib fonts [guy@netapp.com]
* - PNG fixes [guy@netapp.com]
* - more m$ document magic [guy@netapp.com]
* - PPD files [guy@netapp.com]
* - archive magic cleanup [guy@netapp.com]
* - linux kernel magic cleanup [guy@netapp.com]
* - lecter magic [guy@netapp.com]
* - vgetty magic [guy@netapp.com]
* - sniffer additions [guy@netapp.com]
*
* Revision 1.22 1997/01/15 17:23:24 christos
* - add support for elf core files: find the program name under SVR4 [Ken Pizzini]
* - print strings only up to the first carriage return [various]
* - freebsd international ascii support [J Wunsch]
* - magic fixes and additions [Guy Harris]
* - 64 bit fixes [Larry Schwimmer]
* - support for both utime and utimes, but don't restore file access times
* by default [various]
* - \xXX only takes 2 hex digits, not 3.
* - re-implement support for core files [Guy Harris]
*
* Revision 1.21 1996/10/05 18:15:29 christos
* Segregate elf stuff and conditionally enable it with -DBUILTIN_ELF
* More magic fixes
*
* Revision 1.20 1996/06/22 22:15:52 christos
* - support relative offsets of the form >&
* - fix bug with truncating magic strings that contain \n
* - file -f - did not read from stdin as documented
* - support elf file parsing using our own elf support.
* - as always magdir fixes and additions.
*
* Revision 1.19 1995/10/27 23:14:46 christos
* Ability to parse colon separated list of magic files
* New LEGAL.NOTICE
* Various magic file changes
*
* Revision 1.18 1995/05/20 22:09:21 christos
* Passed incorrect argument to eatsize().
* Use %ld and %lx where appropriate.
* Remove unused variables
* ELF support for both big and little endian
* Fixes for small files again.
*
* Revision 1.17 1995/04/28 17:29:13 christos
* - Incorrect nroff detection fix from der Mouse
* - Lost and incorrect magic entries.
* - Added ELF stripped binary detection [in C; ugh]
* - Look for $MAGIC to find the magic file.
* - Eat trailing size specifications from numbers i.e. ignore 10L
* - More fixes for very short files
*
* Revision 1.16 1995/03/25 22:06:45 christos
* - use strtoul() where it exists.
* - fix sign-extend bug
* - try to detect tar archives before nroff files, otherwise
* tar files where the first file starts with a . will not work
*
* Revision 1.15 1995/01/21 21:03:35 christos
* Added CSECTION for the file man page
* Added version flag -v
* Fixed bug with -f input flag (from iorio@violet.berkeley.edu)
* Lots of magic fixes and reorganization...
*
* Revision 1.14 1994/05/03 17:58:23 christos
* changes from mycroft@gnu.ai.mit.edu (Charles Hannum) for unsigned
*
* Revision 1.13 1994/01/21 01:27:01 christos
* Fixed null termination bug from Don Seeley at BSDI in ascmagic.c
*
* Revision 1.12 1993/10/27 20:59:05 christos
* Changed -z flag to understand gzip format too.
* Moved builtin compression detection to a table, and move
* the compress magic entry out of the source.
* Made printing of numbers unsigned, and added the mask to it.
* Changed the buffer size to 8k, because gzip will refuse to
* unzip just a few bytes.
*
* Revision 1.11 1993/09/24 18:49:06 christos
* Fixed small bug in softmagic.c introduced by
* copying the data to be examined out of the input
* buffer. Changed the Makefile to use sed to create
* the correct man pages.
*
* Revision 1.10 1993/09/23 21:56:23 christos
* Passed purify. Fixed indirections. Fixed byte order printing.
* Fixed segmentation faults caused by referencing past the end
* of the magic buffer. Fixed bus errors caused by referencing
* unaligned shorts or longs.
*
* Revision 1.9 1993/03/24 14:23:40 ian
* Batch of minor changes from several contributors.
*
* Revision 1.8 93/02/19 15:01:26 ian
* Numerous changes from Guy Harris too numerous to mention but including
* byte-order independance, fixing "old-style masking", etc. etc. A bugfix
* for broken symlinks from martin@@d255s004.zfe.siemens.de.
*
* Revision 1.7 93/01/05 14:57:27 ian
* Couple of nits picked by Christos (again, thanks).
*
* Revision 1.6 93/01/05 13:51:09 ian
* Lotsa work on the Magic directory.
*
* Revision 1.5 92/09/14 14:54:51 ian
* Fix a tiny null-pointer bug in previous fix for tar archive + uncompress.
*
*/

View File

@ -32,7 +32,7 @@
#include "file.h" #include "file.h"
#ifndef lint #ifndef lint
FILE_RCSID("@(#)$File: softmagic.c,v 1.299 2020/06/07 21:58:01 christos Exp $") FILE_RCSID("@(#)$File: softmagic.c,v 1.309 2021/02/05 22:29:07 christos Exp $")
#endif /* lint */ #endif /* lint */
#include "magic.h" #include "magic.h"
@ -169,6 +169,8 @@ file_fmtcheck(struct magic_set *ms, const char *desc, const char *def,
#define F(a, b, c) ((b)) #define F(a, b, c) ((b))
#endif #endif
/* NOTE this function has been kept an the state of 5.39 for BC. Observe
* further as the upgrade to 5.41 or above goes. */
/* /*
* Go through the whole list, stopping if you find a match. Process all * Go through the whole list, stopping if you find a match. Process all
* the continuations of that match before returning. * the continuations of that match before returning.
@ -498,6 +500,28 @@ check_fmt(struct magic_set *ms, const char *fmt)
return rv; return rv;
} }
#if !defined(HAVE_STRNDUP) || defined(__aiws__) || defined(_AIX)
# if defined(__aiws__) || defined(_AIX)
# define strndup aix_strndup /* aix is broken */
# endif
char *strndup(const char *, size_t);
char *
strndup(const char *str, size_t n)
{
size_t len;
char *copy;
for (len = 0; len < n && str[len]; len++)
continue;
if ((copy = malloc(len + 1)) == NULL)
return NULL;
(void)memcpy(copy, str, len);
copy[len] = '\0';
return copy;
}
#endif /* HAVE_STRNDUP */
static int static int
varexpand(struct magic_set *ms, char *buf, size_t len, const char *str) varexpand(struct magic_set *ms, char *buf, size_t len, const char *str)
{ {
@ -569,93 +593,58 @@ mprint(struct magic_set *ms, struct magic *m)
else else
desc = ebuf; desc = ebuf;
#define PRINTER(value, format, stype, utype) \
v = file_signextend(ms, m, CAST(uint64_t, value)); \
switch (check_fmt(ms, desc)) { \
case -1: \
return -1; \
case 1: \
if (m->flag & UNSIGNED) { \
(void)snprintf(buf, sizeof(buf), "%" format "u", \
CAST(utype, v)); \
} else { \
(void)snprintf(buf, sizeof(buf), "%" format "d", \
CAST(stype, v)); \
} \
if (file_printf(ms, F(ms, desc, "%s"), buf) == -1) \
return -1; \
break; \
default: \
if (m->flag & UNSIGNED) { \
if (file_printf(ms, F(ms, desc, "%" format "u"), \
CAST(utype, v)) == -1) \
return -1; \
} else { \
if (file_printf(ms, F(ms, desc, "%" format "d"), \
CAST(stype, v)) == -1) \
return -1; \
} \
break; \
} \
t = ms->offset + sizeof(stype); \
break
switch (m->type) { switch (m->type) {
case FILE_BYTE: case FILE_BYTE:
v = file_signextend(ms, m, CAST(uint64_t, p->b)); PRINTER(p->b, "", int8_t, uint8_t);
switch (check_fmt(ms, desc)) {
case -1:
return -1;
case 1:
(void)snprintf(buf, sizeof(buf), "%d",
CAST(unsigned char, v));
if (file_printf(ms, F(ms, desc, "%s"), buf) == -1)
return -1;
break;
default:
if (file_printf(ms, F(ms, desc, "%d"),
CAST(unsigned char, v)) == -1)
return -1;
break;
}
t = ms->offset + sizeof(char);
break;
case FILE_SHORT: case FILE_SHORT:
case FILE_BESHORT: case FILE_BESHORT:
case FILE_LESHORT: case FILE_LESHORT:
v = file_signextend(ms, m, CAST(uint64_t, p->h)); PRINTER(p->h, "", int16_t, uint16_t);
switch (check_fmt(ms, desc)) {
case -1:
return -1;
case 1:
(void)snprintf(buf, sizeof(buf), "%u",
CAST(unsigned short, v));
if (file_printf(ms, F(ms, desc, "%s"), buf) == -1)
return -1;
break;
default:
if (file_printf(ms, F(ms, desc, "%u"),
CAST(unsigned short, v)) == -1)
return -1;
break;
}
t = ms->offset + sizeof(short);
break;
case FILE_LONG: case FILE_LONG:
case FILE_BELONG: case FILE_BELONG:
case FILE_LELONG: case FILE_LELONG:
case FILE_MELONG: case FILE_MELONG:
v = file_signextend(ms, m, CAST(uint64_t, p->l)); PRINTER(p->l, "", int32_t, uint32_t);
switch (check_fmt(ms, desc)) {
case -1:
return -1;
case 1:
(void)snprintf(buf, sizeof(buf), "%u",
CAST(uint32_t, v));
if (file_printf(ms, F(ms, desc, "%s"), buf) == -1)
return -1;
break;
default:
if (file_printf(ms, F(ms, desc, "%u"),
CAST(uint32_t, v)) == -1)
return -1;
break;
}
t = ms->offset + sizeof(int32_t);
break; break;
case FILE_QUAD: case FILE_QUAD:
case FILE_BEQUAD: case FILE_BEQUAD:
case FILE_LEQUAD: case FILE_LEQUAD:
case FILE_OFFSET: case FILE_OFFSET:
v = file_signextend(ms, m, p->q); PRINTER(p->q, INT64_T_FORMAT, long long, unsigned long long);
switch (check_fmt(ms, desc)) {
case -1:
return -1;
case 1:
(void)snprintf(buf, sizeof(buf), "%" INT64_T_FORMAT "u",
CAST(unsigned long long, v));
if (file_printf(ms, F(ms, desc, "%s"), buf) == -1)
return -1;
break;
default:
if (file_printf(ms, F(ms, desc, "%" INT64_T_FORMAT "u"),
CAST(unsigned long long, v)) == -1)
return -1;
break;
}
t = ms->offset + sizeof(int64_t);
break; break;
case FILE_STRING: case FILE_STRING:
@ -678,19 +667,9 @@ mprint(struct magic_set *ms, struct magic *m)
if (*m->value.s == '\0') if (*m->value.s == '\0')
str[strcspn(str, "\r\n")] = '\0'; str[strcspn(str, "\r\n")] = '\0';
if (m->str_flags & STRING_TRIM) { if (m->str_flags & STRING_TRIM)
char *last; str = file_strtrim(str);
while (isspace(CAST(unsigned char, *str)))
str++;
last = str;
while (*last)
last++;
--last;
while (isspace(CAST(unsigned char, *last)))
last--;
*++last = '\0';
}
if (file_printf(ms, F(ms, desc, "%s"), if (file_printf(ms, F(ms, desc, "%s"),
file_printable(sbuf, sizeof(sbuf), str, file_printable(sbuf, sizeof(sbuf), str,
sizeof(p->s) - (str - p->s))) == -1) sizeof(p->s) - (str - p->s))) == -1)
@ -795,14 +774,20 @@ mprint(struct magic_set *ms, struct magic *m)
case FILE_SEARCH: case FILE_SEARCH:
case FILE_REGEX: { case FILE_REGEX: {
char *cp; char *cp, *scp;
int rval; int rval;
cp = estrndup(RCAST(const char *, ms->search.s), cp = strndup(RCAST(const char *, ms->search.s),
ms->search.rm_len); ms->search.rm_len);
if (cp == NULL) {
file_oomem(ms, ms->search.rm_len);
return -1;
}
scp = (m->str_flags & STRING_TRIM) ? file_strtrim(cp) : cp;
rval = file_printf(ms, F(ms, desc, "%s"), rval = file_printf(ms, F(ms, desc, "%s"),
file_printable(sbuf, sizeof(sbuf), cp, ms->search.rm_len)); file_printable(sbuf, sizeof(sbuf), scp, ms->search.rm_len));
efree(cp); free(cp);
if (rval == -1) if (rval == -1)
return -1; return -1;
@ -955,6 +940,7 @@ moffset(struct magic_set *ms, struct magic *m, const struct buffer *b,
case FILE_DEFAULT: case FILE_DEFAULT:
case FILE_INDIRECT: case FILE_INDIRECT:
case FILE_OFFSET: case FILE_OFFSET:
case FILE_USE:
o = ms->offset; o = ms->offset;
break; break;
@ -1541,6 +1527,28 @@ normal:
return 0; return 0;
} }
private int
save_cont(struct magic_set *ms, struct cont *c)
{
size_t len;
*c = ms->c;
len = c->len * sizeof(*c->li);
ms->c.li = CAST(struct level_info *, malloc(len));
if (ms->c.li == NULL) {
ms->c = *c;
return -1;
}
memcpy(ms->c.li, c->li, len);
return 0;
}
private void
restore_cont(struct magic_set *ms, struct cont *c)
{
free(ms->c.li);
ms->c = *c;
}
private int private int
mget(struct magic_set *ms, struct magic *m, const struct buffer *b, mget(struct magic_set *ms, struct magic *m, const struct buffer *b,
const unsigned char *s, size_t nbytes, size_t o, unsigned int cont_level, const unsigned char *s, size_t nbytes, size_t o, unsigned int cont_level,
@ -1548,14 +1556,15 @@ mget(struct magic_set *ms, struct magic *m, const struct buffer *b,
int *printed_something, int *need_separator, int *returnval, int *printed_something, int *need_separator, int *returnval,
int *found_match) int *found_match)
{ {
uint32_t offset = ms->offset; uint32_t eoffset, offset = ms->offset;
struct buffer bb; struct buffer bb;
intmax_t lhs; intmax_t lhs;
file_pushbuf_t *pb; file_pushbuf_t *pb;
int rv, oneed_separator, in_type; int rv, oneed_separator, in_type, nfound_match;
char *rbuf; char *rbuf;
union VALUETYPE *p = &ms->ms_value; union VALUETYPE *p = &ms->ms_value;
struct mlist ml; struct mlist ml;
struct cont c;
if (*indir_count >= ms->indir_max) { if (*indir_count >= ms->indir_max) {
file_error(ms, 0, "indirect count (%hu) exceeded", file_error(ms, 0, "indirect count (%hu) exceeded",
@ -1836,7 +1845,8 @@ mget(struct magic_set *ms, struct magic *m, const struct buffer *b,
if (rv == 1) { if (rv == 1) {
if ((ms->flags & MAGIC_NODESC) == 0 && if ((ms->flags & MAGIC_NODESC) == 0 &&
file_printf(ms, F(ms, m->desc, "%u"), offset) == -1) { file_printf(ms, F(ms, m->desc, "%u"), offset) == -1)
{
if (rbuf) efree(rbuf); if (rbuf) efree(rbuf);
return -1; return -1;
} }
@ -1860,16 +1870,32 @@ mget(struct magic_set *ms, struct magic *m, const struct buffer *b,
file_error(ms, 0, "cannot find entry `%s'", rbuf); file_error(ms, 0, "cannot find entry `%s'", rbuf);
return -1; return -1;
} }
(*name_count)++; if (save_cont(ms, &c) == -1) {
file_error(ms, errno, "can't allocate continuation");
return -1;
}
oneed_separator = *need_separator; oneed_separator = *need_separator;
if (m->flag & NOSPACE) if (m->flag & NOSPACE)
*need_separator = 0; *need_separator = 0;
nfound_match = 0;
(*name_count)++;
eoffset = ms->eoffset;
rv = match(ms, ml.magic, ml.nmagic, b, offset + o, rv = match(ms, ml.magic, ml.nmagic, b, offset + o,
mode, text, flip, indir_count, name_count, mode, text, flip, indir_count, name_count,
printed_something, need_separator, returnval, found_match); printed_something, need_separator, returnval,
&nfound_match);
ms->ms_value.q = nfound_match;
(*name_count)--; (*name_count)--;
*found_match |= nfound_match;
restore_cont(ms, &c);
if (rv != 1) if (rv != 1)
*need_separator = oneed_separator; *need_separator = oneed_separator;
ms->offset = offset;
ms->eoffset = eoffset;
return rv; return rv;
case FILE_NAME: case FILE_NAME:
@ -1934,13 +1960,10 @@ file_strncmp(const char *s1, const char *s2, size_t len, size_t maxlen,
} }
else if ((flags & STRING_COMPACT_WHITESPACE) && else if ((flags & STRING_COMPACT_WHITESPACE) &&
isspace(*a)) { isspace(*a)) {
/* XXX Dirty. The data and the pattern is what is causing this.
Revert _i for the next port and see if it still matters. */
uint32_t _i = 0;
a++; a++;
if (isspace(*b++)) { if (isspace(*b++)) {
if (!isspace(*a)) if (!isspace(*a))
while (EXPECTED(_i++ < 2048) && b < eb && isspace(*b)) while (b < eb && isspace(*b))
b++; b++;
} }
else { else {
@ -2282,9 +2305,10 @@ error_out:
} }
break; break;
} }
case FILE_INDIRECT:
case FILE_USE: case FILE_USE:
return ms->ms_value.q != 0;
case FILE_NAME: case FILE_NAME:
case FILE_INDIRECT:
return 1; return 1;
case FILE_DER: case FILE_DER:
matched = der_cmp(ms, m); matched = der_cmp(ms, m);

View File

@ -1,99 +1,22 @@
diff -u magic.orig/Magdir/images magic/Magdir/images diff -ur Magdir.orig/mail.news Magdir/mail.news
--- magic.orig/Magdir/images 2020-05-31 12:34:40.000000000 +0200 --- Magdir.orig/mail.news 2021-03-31 01:47:28.000000000 +0200
+++ magic/Magdir/images 2020-07-05 20:00:41.664783368 +0200 +++ Magdir/mail.news 2021-04-05 19:41:55.168556972 +0200
@@ -1,6 +1,6 @@ @@ -1,5 +1,5 @@
#------------------------------------------------------------------------------ #------------------------------------------------------------------------------
-# $File: images,v 1.181 2020/05/30 23:49:03 christos Exp $ -# $File: mail.news,v 1.26 2021/03/21 14:37:03 christos Exp $
+# $File: images,v 1.183 2020/06/26 17:08:32 christos Exp $ +# $File: mail.news,v 1.27 2021/04/05 16:36:14 christos Exp $
# images: file(1) magic for image formats (see also "iff", and "c-lang" for # mail.news: file(1) magic for mail and news
# XPM bitmaps)
# #
@@ -32,22 +32,22 @@ # Unfortunately, saved netnews also has From line added in some news software.
# Prevent conflicts with CRI ADX. @@ -81,4 +81,4 @@
>(2.S-2) belong !0x28632943 # File format spec: https://wiki.dovecot.org/Design/Dcrypt/#File_format
# skip more garbage like *.iso by looking for positive image type # From: Stephen Gildea
->>2 ubyte >0 0 string CRYPTED\003\007 Dovecot encrypted message
+>2 ubyte >0 ->9 byte xu \b, dcrypt version %d
# skip some compiled terminfo like xterm+tmux by looking for image type less equal 33 +>9 byte x \b, dcrypt version %d
->>>2 ubyte <34 diff -ur Magdir.orig/rpm Magdir/rpm
+>>2 ubyte <34 --- Magdir.orig/rpm 2021-02-23 01:49:24.000000000 +0100
# skip arches.3200 , Finder.Root , Slp.1 by looking for low pixel depth 1 8 15 16 24 32 +++ Magdir/rpm 2021-04-05 19:40:55.080911893 +0200
->>>>16 ubyte 1
->>>>>0 use tga-image
->>>>16 ubyte 8
->>>>>0 use tga-image
->>>>16 ubyte 15
->>>>>0 use tga-image
->>>>16 ubyte 16
->>>>>0 use tga-image
->>>>16 ubyte 24
->>>>>0 use tga-image
->>>>16 ubyte 32
->>>>>0 use tga-image
+>>>16 ubyte 1
+>>>>0 use tga-image
+>>>16 ubyte 8
+>>>>0 use tga-image
+>>>16 ubyte 15
+>>>>0 use tga-image
+>>>16 ubyte 16
+>>>>0 use tga-image
+>>>16 ubyte 24
+>>>>0 use tga-image
+>>>16 ubyte 32
+>>>>0 use tga-image
# display tga bitmap image information
0 name tga-image
>2 ubyte <34 Targa image data
@@ -615,7 +615,7 @@
0 leshort 40
# skip bad samples like GAME by looking for valid number of color planes
>12 uleshort 1 Device independent bitmap graphic
-!:mime image/bmp
+!:mime image/x-ms-bmp
!:apple ????BMPp
!:ext dib
>>4 lelong x \b, %d x
@@ -641,7 +641,7 @@
>>18 leshort x \b, %d x
>>20 leshort x %d
>14 leshort 64 PC bitmap, OS/2 2.x format
-!:mime image/bmp
+!:mime image/x-ms-bmp
!:apple ????BMPp
!:ext bmp
# image width and height fields are unsigned integers for OS/2
@@ -662,7 +662,7 @@
#>>(10.l) ubequad !0 \b, bits 0x%16.16llx
# BITMAPV2INFOHEADER adds RGB bit masks
>14 leshort 52 PC bitmap, Adobe Photoshop
-!:mime image/bmp
+!:mime image/x-ms-bmp
!:apple ????BMPp
!:ext bmp
>>18 lelong x \b, %d x
@@ -670,7 +670,7 @@
>>28 leshort x %d
# BITMAPV3INFOHEADER adds alpha channel bit mask
>14 leshort 56 PC bitmap, Adobe Photoshop with alpha channel mask
-!:mime image/bmp
+!:mime image/x-ms-bmp
!:apple ????BMPp
!:ext bmp
>>18 lelong x \b, %d x
@@ -679,7 +679,7 @@
>14 leshort 40
# jump 4 bytes before end of file/header to skip fmt-116-signature-id-118.dib
>>(2.l-4) ulong x PC bitmap, Windows 3.x format
-!:mime image/bmp
+!:mime image/x-ms-bmp
!:apple ????BMPp
>>>18 lelong x \b, %d x
>>>22 lelong x %d
diff -u magic.orig/Magdir/rpm magic/Magdir/rpm
--- magic.orig/Magdir/rpm 2019-02-22 14:06:34.000000000 +0100
+++ magic/Magdir/rpm 2020-07-05 19:38:02.720419674 +0200
@@ -29,6 +29,7 @@ @@ -29,6 +29,7 @@
>>8 beshort 17 SuperH >>8 beshort 17 SuperH
>>8 beshort 18 Xtensa >>8 beshort 18 Xtensa
@ -102,9 +25,9 @@ diff -u magic.orig/Magdir/rpm magic/Magdir/rpm
#delta RPM Daniel Novotny (dnovotny@redhat.com) #delta RPM Daniel Novotny (dnovotny@redhat.com)
0 string drpm Delta RPM 0 string drpm Delta RPM
diff -u magic.orig/Magdir/securitycerts magic/Magdir/securitycerts diff -ur Magdir.orig/securitycerts Magdir/securitycerts
--- magic.orig/Magdir/securitycerts 2019-02-22 14:06:34.000000000 +0100 --- Magdir.orig/securitycerts 2021-02-23 01:49:24.000000000 +0100
+++ magic/Magdir/securitycerts 2020-07-05 19:38:02.720419674 +0200 +++ Magdir/securitycerts 2021-04-05 19:40:55.080911893 +0200
@@ -4,3 +4,5 @@ @@ -4,3 +4,5 @@
0 search/1 -----BEGIN\ CERTIFICATE------ RFC1421 Security Certificate text 0 search/1 -----BEGIN\ CERTIFICATE------ RFC1421 Security Certificate text
0 search/1 -----BEGIN\ NEW\ CERTIFICATE RFC1421 Security Certificate Signing Request text 0 search/1 -----BEGIN\ NEW\ CERTIFICATE RFC1421 Security Certificate Signing Request text

View File

@ -15,4 +15,4 @@ $type = $finfo->buffer($string);
var_dump($type); var_dump($type);
?> ?>
--EXPECT-- --EXPECT--
string(60) "ASCII text, with very long lines, with CRLF line terminators" string(66) "ASCII text, with very long lines (617), with CRLF line terminators"

View File

@ -23,4 +23,4 @@ var_dump($type);
?> ?>
--EXPECT-- --EXPECT--
string(60) "ASCII text, with very long lines, with CRLF line terminators" string(67) "ASCII text, with very long lines (8191), with CRLF line terminators"

View File

@ -24,5 +24,5 @@ try {
*** Testing finfo_file() : basic functionality *** *** Testing finfo_file() : basic functionality ***
string(28) "text/x-php; charset=us-ascii" string(28) "text/x-php; charset=us-ascii"
string(22) "PHP script, ASCII text" string(22) "PHP script, ASCII text"
string(25) "text/plain; charset=utf-8" string(28) "text/plain; charset=us-ascii"
finfo_file(): Argument #1 ($finfo) must not contain any null bytes finfo_file(): Argument #1 ($finfo) must not contain any null bytes

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff