fileinfo: Port libmagic 5.40

Signed-off-by: Anatol Belski <ab@php.net>
2024-11-30 21:35:36 +08:00 · 2021-04-01 20:15:45 +02:00 · 2021-04-01 20:15:45 +02:00 · 3b9173dc8f
commit 3b9173dc8f
parent 22019a1edd
21 changed files with 174944 additions and 148884 deletions
--- a/ext/fileinfo/data_file.c
+++ b/ext/fileinfo/data_file.c
--- a/ext/fileinfo/libmagic.patch
+++ b/ext/fileinfo/libmagic.patch
--- a/ext/fileinfo/libmagic/apprentice.c
+++ b/ext/fileinfo/libmagic/apprentice.c
@ -34,11 +34,10 @@
 #include "file.h"
 #ifndef	lint
-FILE_RCSID("@(#)$File: apprentice.c,v 1.297 2020/05/09 18:57:15 christos Exp $")
+FILE_RCSID("@(#)$File: apprentice.c,v 1.301 2021/02/23 00:51:11 christos Exp $")
 #endif	/* lint */
 #include "magic.h"
 #include "patchlevel.h"
 #include <stdlib.h>
 #if defined(__hpux) && !defined(HAVE_STRTOULL)
@ -536,6 +535,7 @@ file_ms_alloc(int flags)
 	ms->elf_notes_max = FILE_ELF_NOTES_MAX;
 	ms->regex_max = FILE_REGEX_MAX;
 	ms->bytes_max = FILE_BYTES_MAX;
 	ms->encoding_max = FILE_ENCODING_MAX;
 	return ms;
 free:
 	efree(ms);
@ -1416,7 +1416,10 @@ apprentice_load(struct magic_set *ms, const char *fn, int action)
 		 */
 		set_last_default(ms, mset[j].me, mset[j].count);
-		/* coalesce per file arrays into a single one */
+		/* coalesce per file arrays into a single one, if needed */
 		if (mset[j].count == 0)
 			continue;
 		if (coalesce_entries(ms, mset[j].me, mset[j].count,
 		    &map->magic[j], &map->nmagic[j]) == -1) {
 			errs++;
@ -2086,6 +2089,13 @@ parse(struct magic_set *ms, struct magic_entry *me, const char *line,
 		return -1;
 	}
 	if (m->type == FILE_NAME && cont_level != 0) {
 		if (ms->flags & MAGIC_CHECK)
 			file_magwarn(ms, "`name%s' entries can only be "
 			    "declared at top level", l);
 		return -1;
 	}
 	/* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
 	/* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
@ -2699,7 +2709,7 @@ getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
 		ull = CAST(uint64_t, strtoull(*p, &ep, 0));
 		m->value.q = file_signextend(ms, m, ull);
 		if (*p == ep) {
-			file_magwarn(ms, "Unparseable number `%s'", *p);
+			file_magwarn(ms, "Unparsable number `%s'", *p);
 		} else {
 			size_t ts = typesize(m->type);
 			uint64_t x;
@ -3101,8 +3111,8 @@ internal_loaded:
 	else
 		version = ptr[1];
 	if (version != VERSIONNO) {
-		file_error(ms, 0, "File %d.%d supports only version %d magic "
+		file_error(ms, 0, "File %d supports only version %d magic "
-		    "files. `%s' is version %d", FILE_VERSION_MAJOR, patchlevel,
+		    "files. `%s' is version %d", MAGIC_VERSION,
 		    VERSIONNO, dbname, version);
 		goto error;
 	}
--- a/ext/fileinfo/libmagic/ascmagic.c
+++ b/ext/fileinfo/libmagic/ascmagic.c
@ -35,7 +35,7 @@
 #include "file.h"
 #ifndef	lint
-FILE_RCSID("@(#)$File: ascmagic.c,v 1.107 2020/06/08 19:58:36 christos Exp $")
+FILE_RCSID("@(#)$File: ascmagic.c,v 1.109 2021/02/05 23:01:40 christos Exp $")
 #endif	/* lint */
 #include "magic.h"
@ -50,7 +50,8 @@ FILE_RCSID("@(#)$File: ascmagic.c,v 1.107 2020/06/08 19:58:36 christos Exp $")
 #define ISSPC(x) ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' \
 		  || (x) == 0x85 || (x) == '\f')
-private unsigned char *encode_utf8(unsigned char *, size_t, unicodechar *, size_t);
+private unsigned char *encode_utf8(unsigned char *, size_t, file_unichar_t *,
    size_t);
 private size_t trim_nuls(const unsigned char *, size_t);
 /*
@ -69,7 +70,7 @@ trim_nuls(const unsigned char *buf, size_t nbytes)
 protected int
 file_ascmagic(struct magic_set *ms, const struct buffer *b, int text)
 {
-	unicodechar *ubuf = NULL;
+	file_unichar_t *ubuf = NULL;
 	size_t ulen = 0;
 	int rv = 1;
 	struct buffer bb;
@ -101,9 +102,9 @@ file_ascmagic(struct magic_set *ms, const struct buffer *b, int text)
 }
 protected int
-file_ascmagic_with_encoding(struct magic_set *ms,
+file_ascmagic_with_encoding(struct magic_set *ms, const struct buffer *b,
-    const struct buffer *b, unicodechar *ubuf, size_t ulen, const char *code,
+    file_unichar_t *ubuf, size_t ulen, const char *code, const char *type,
-    const char *type, int text)
+    int text)
 {
 	struct buffer bb;
 	const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
@ -127,7 +128,7 @@ file_ascmagic_with_encoding(struct magic_set *ms,
 	int executable = 0;
 	size_t last_line_end = CAST(size_t, -1);
-	int has_long_lines = 0;
+	size_t has_long_lines = 0;
 	nbytes = trim_nuls(buf, nbytes);
@ -190,8 +191,11 @@ file_ascmagic_with_encoding(struct magic_set *ms,
 		}
 		/* If this line is _longer_ than MAXLINELEN, remember it. */
-		if (i > last_line_end + MAXLINELEN)
+		if (i > last_line_end + MAXLINELEN) {
-			has_long_lines = 1;
+			size_t ll = i - last_line_end;
 			if (ll > has_long_lines)
 				has_long_lines = ll;
 		}
 		if (ubuf[i] == '\033')
 			has_escapes = 1;
@ -269,7 +273,8 @@ file_ascmagic_with_encoding(struct magic_set *ms,
 				goto done;
 		if (has_long_lines)
-			if (file_printf(ms, ", with very long lines") == -1)
+			if (file_printf(ms, ", with very long lines (%zu)",
 			    has_long_lines) == -1)
 				goto done;
 		/*
@ -281,7 +286,8 @@ file_ascmagic_with_encoding(struct magic_set *ms,
 			if (file_printf(ms, ", with") == -1)
 				goto done;
-			if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) {
+			if (n_crlf == 0 && n_cr == 0 &&
 			    n_nel == 0 && n_lf == 0) {
 				if (file_printf(ms, " no") == -1)
 					goto done;
 			} else {
@ -335,7 +341,7 @@ done:
 * after end of string, or NULL if an invalid character is found.
 */
 private unsigned char *
-encode_utf8(unsigned char *buf, size_t len, unicodechar *ubuf, size_t ulen)
+encode_utf8(unsigned char *buf, size_t len, file_unichar_t *ubuf, size_t ulen)
 {
 	size_t i;
 	unsigned char *end = buf + len;
@ -345,43 +351,45 @@ encode_utf8(unsigned char *buf, size_t len, unicodechar *ubuf, size_t ulen)
 			if (end - buf < 1)
 				return NULL;
 			*buf++ = CAST(unsigned char, ubuf[i]);
-		} else if (ubuf[i] <= 0x7ff) {
+			continue;
 		} 
 		if (ubuf[i] <= 0x7ff) {
 			if (end - buf < 2)
 				return NULL;
 			*buf++ = CAST(unsigned char, (ubuf[i] >> 6) + 0xc0);
-			*buf++ = CAST(unsigned char, (ubuf[i] & 0x3f) + 0x80);
+			goto out1;
-		} else if (ubuf[i] <= 0xffff) {
+		}
 		if (ubuf[i] <= 0xffff) {
 			if (end - buf < 3)
 				return NULL;
 			*buf++ = CAST(unsigned char, (ubuf[i] >> 12) + 0xe0);
-			*buf++ = CAST(unsigned char, ((ubuf[i] >> 6) & 0x3f) + 0x80);
+			goto out2;
-			*buf++ = CAST(unsigned char, (ubuf[i] & 0x3f) + 0x80);
+		}
-		} else if (ubuf[i] <= 0x1fffff) {
+		if (ubuf[i] <= 0x1fffff) {
 			if (end - buf < 4)
 				return NULL;
 			*buf++ = CAST(unsigned char, (ubuf[i] >> 18) + 0xf0);
-			*buf++ = CAST(unsigned char, ((ubuf[i] >> 12) & 0x3f) + 0x80);
+			goto out3;
-			*buf++ = CAST(unsigned char, ((ubuf[i] >>  6) & 0x3f) + 0x80);
+		}
-			*buf++ = CAST(unsigned char, (ubuf[i] & 0x3f) + 0x80);
+		if (ubuf[i] <= 0x3ffffff) {
 		} else if (ubuf[i] <= 0x3ffffff) {
 			if (end - buf < 5)
 				return NULL;
 			*buf++ = CAST(unsigned char, (ubuf[i] >> 24) + 0xf8);
-			*buf++ = CAST(unsigned char, ((ubuf[i] >> 18) & 0x3f) + 0x80);
+			goto out4;
-			*buf++ = CAST(unsigned char, ((ubuf[i] >> 12) & 0x3f) + 0x80);
+		} 
-			*buf++ = CAST(unsigned char, ((ubuf[i] >>  6) & 0x3f) + 0x80);
+		if (ubuf[i] <= 0x7fffffff) {
 			*buf++ = CAST(unsigned char, (ubuf[i] & 0x3f) + 0x80);
 		} else if (ubuf[i] <= 0x7fffffff) {
 			if (end - buf < 6)
 				return NULL;
 			*buf++ = CAST(unsigned char, (ubuf[i] >> 30) + 0xfc);
-			*buf++ = CAST(unsigned char, ((ubuf[i] >> 24) & 0x3f) + 0x80);
+			goto out5;
-			*buf++ = CAST(unsigned char, ((ubuf[i] >> 18) & 0x3f) + 0x80);
+		} 
-			*buf++ = CAST(unsigned char, ((ubuf[i] >> 12) & 0x3f) + 0x80);
+		/* Invalid character */
-			*buf++ = CAST(unsigned char, ((ubuf[i] >>  6) & 0x3f) + 0x80);
+		return NULL;
-			*buf++ = CAST(unsigned char, (ubuf[i] & 0x3f) + 0x80);
+	out5:	*buf++ = CAST(unsigned char, ((ubuf[i] >> 24) & 0x3f) + 0x80);
-		} else /* Invalid character */
+	out4:	*buf++ = CAST(unsigned char, ((ubuf[i] >> 18) & 0x3f) + 0x80);
-			return NULL;
+	out3:	*buf++ = CAST(unsigned char, ((ubuf[i] >> 12) & 0x3f) + 0x80);
 	out2:	*buf++ = CAST(unsigned char, ((ubuf[i] >>  6) & 0x3f) + 0x80);
 	out1:	*buf++ = CAST(unsigned char, ((ubuf[i] >>  0) & 0x3f) + 0x80);
 	}
 	return buf;
--- a/ext/fileinfo/libmagic/compress.c
+++ b/ext/fileinfo/libmagic/compress.c
@ -35,7 +35,7 @@
 #include "file.h"
 #ifndef lint
-FILE_RCSID("@(#)$File: compress.c,v 1.127 2020/05/31 00:11:06 christos Exp $")
+FILE_RCSID("@(#)$File: compress.c,v 1.129 2020/12/08 21:26:00 christos Exp $")
 #endif
 #include "magic.h"
@ -72,7 +72,7 @@ typedef void (*sig_t)(int);
 #include <bzlib.h>
 #endif
-#if defined(HAVE_XZLIB_H) && defined(XZLIBSUPPORT)
+#if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT)
 #define BUILTIN_XZLIB
 #include <lzma.h>
 #endif
@ -847,8 +847,23 @@ uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old,
 	for (i = 0; i < __arraycount(fdp); i++)
 		fdp[i][0] = fdp[i][1] = -1;
-	if ((fd == -1 && pipe(fdp[STDIN_FILENO]) == -1) ||
+	/*
-	    pipe(fdp[STDOUT_FILENO]) == -1 || pipe(fdp[STDERR_FILENO]) == -1) {
+	 * There are multithreaded users who run magic_file()
 	 * from dozens of threads. If two parallel magic_file() calls
 	 * analyze two large compressed files, both will spawn
 	 * an uncompressing child here, which writes out uncompressed data.
 	 * We read some portion, then close the pipe, then waitpid() the child.
 	 * If uncompressed data is larger, child shound get EPIPE and exit.
 	 * However, with *parallel* calls OTHER child may unintentionally
 	 * inherit pipe fds, thus keeping pipe open and making writes in
 	 * our child block instead of failing with EPIPE!
 	 * (For the bug to occur, two threads must mutually inherit their pipes,
 	 * and both must have large outputs. Thus it happens not that often).
 	 * To avoid this, be sure to create pipes with O_CLOEXEC.
 	 */
 	if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) ||
 	    file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 ||
 	    file_pipe_closexec(fdp[STDERR_FILENO]) == -1) {
 		closep(fdp[STDIN_FILENO]);
 		closep(fdp[STDOUT_FILENO]);
 		return makeerror(newch, n, "Cannot create pipe, %s",
@ -879,16 +894,20 @@ uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old,
 			if (fdp[STDIN_FILENO][1] > 2)
 				(void) close(fdp[STDIN_FILENO][1]);
 		}
 		file_clear_closexec(STDIN_FILENO);
 ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
 		if (copydesc(STDOUT_FILENO, fdp[STDOUT_FILENO][1]))
 			(void) close(fdp[STDOUT_FILENO][1]);
 		if (fdp[STDOUT_FILENO][0] > 2)
 			(void) close(fdp[STDOUT_FILENO][0]);
 		file_clear_closexec(STDOUT_FILENO);
 		if (copydesc(STDERR_FILENO, fdp[STDERR_FILENO][1]))
 			(void) close(fdp[STDERR_FILENO][1]);
 		if (fdp[STDERR_FILENO][0] > 2)
 			(void) close(fdp[STDERR_FILENO][0]);
 		file_clear_closexec(STDERR_FILENO);
 		(void)execvp(compr[method].argv[0],
 		    RCAST(char *const *, RCAST(intptr_t, compr[method].argv)));
--- a/ext/fileinfo/libmagic/config.h
+++ b/ext/fileinfo/libmagic/config.h
@ -1 +0,0 @@
 #include "php.h"
--- a/ext/fileinfo/libmagic/der.c
+++ b/ext/fileinfo/libmagic/der.c
@ -35,7 +35,7 @@
 #include "file.h"
 #ifndef lint
-FILE_RCSID("@(#)$File: der.c,v 1.20 2020/06/07 19:10:37 christos Exp $")
+FILE_RCSID("@(#)$File: der.c,v 1.21 2020/06/15 00:58:10 christos Exp $")
 #endif
 #else
 #define SIZE_T_FORMAT "z"
@ -249,7 +249,6 @@ der_data(char *buf, size_t blen, uint32_t tag, const void *q, uint32_t len)
 		return snprintf(buf, blen,
 		    "20%c%c-%c%c-%c%c %c%c:%c%c:%c%c GMT", d[0], d[1], d[2],
 		    d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11]);
 		break;
 	default:
 		break;
 	}
--- a/ext/fileinfo/libmagic/encoding.c
+++ b/ext/fileinfo/libmagic/encoding.c
@ -35,7 +35,7 @@
 #include "file.h"
 #ifndef	lint
-FILE_RCSID("@(#)$File: encoding.c,v 1.21 2019/06/08 20:49:14 christos Exp $")
+FILE_RCSID("@(#)$File: encoding.c,v 1.27 2021/02/05 21:33:49 christos Exp $")
 #endif	/* lint */
 #include "magic.h"
@ -43,14 +43,20 @@ FILE_RCSID("@(#)$File: encoding.c,v 1.21 2019/06/08 20:49:14 christos Exp $")
 #include <stdlib.h>
-private int looks_ascii(const unsigned char *, size_t, unicodechar *, size_t *);
+private int looks_ascii(const unsigned char *, size_t, file_unichar_t *,
-private int looks_utf8_with_BOM(const unsigned char *, size_t, unicodechar *,
+    size_t *);
 private int looks_utf8_with_BOM(const unsigned char *, size_t, file_unichar_t *,
    size_t *);
 private int looks_utf7(const unsigned char *, size_t, file_unichar_t *,
    size_t *);
 private int looks_ucs16(const unsigned char *, size_t, file_unichar_t *,
    size_t *);
 private int looks_ucs32(const unsigned char *, size_t, file_unichar_t *,
    size_t *);
 private int looks_latin1(const unsigned char *, size_t, file_unichar_t *,
    size_t *);
 private int looks_extended(const unsigned char *, size_t, file_unichar_t *,
    size_t *);
 private int looks_utf7(const unsigned char *, size_t, unicodechar *, size_t *);
 private int looks_ucs16(const unsigned char *, size_t, unicodechar *, size_t *);
 private int looks_ucs32(const unsigned char *, size_t, unicodechar *, size_t *);
 private int looks_latin1(const unsigned char *, size_t, unicodechar *, size_t *);
 private int looks_extended(const unsigned char *, size_t, unicodechar *, size_t *);
 private void from_ebcdic(const unsigned char *, size_t, unsigned char *);
 #ifdef DEBUG_ENCODING
@ -62,19 +68,20 @@ private void from_ebcdic(const unsigned char *, size_t, unsigned char *);
 /*
 * Try to determine whether text is in some character code we can
 * identify.  Each of these tests, if it succeeds, will leave
- * the text converted into one-unicodechar-per-character Unicode in
+ * the text converted into one-file_unichar_t-per-character Unicode in
 * ubuf, and the number of characters converted in ulen.
 */
 protected int
-file_encoding(struct magic_set *ms, const struct buffer *b, unicodechar **ubuf,
+file_encoding(struct magic_set *ms, const struct buffer *b,
-    size_t *ulen, const char **code, const char **code_mime, const char **type)
+    file_unichar_t **ubuf, size_t *ulen, const char **code,
    const char **code_mime, const char **type)
 {
 	const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
 	size_t nbytes = b->flen;
 	size_t mlen;
 	int rv = 1, ucs_type;
 	unsigned char *nbuf = NULL;
-	unicodechar *udefbuf;
+	file_unichar_t *udefbuf;
 	size_t udeflen;
 	if (ubuf == NULL)
@ -87,8 +94,12 @@ file_encoding(struct magic_set *ms, const struct buffer *b, unicodechar **ubuf,
 	*code = "unknown";
 	*code_mime = "binary";
 	if (nbytes > ms->encoding_max)
 		nbytes = ms->encoding_max;
 	mlen = (nbytes + 1) * sizeof((*ubuf)[0]);
-	if ((*ubuf = CAST(unicodechar *, ecalloc(CAST(size_t, 1), mlen))) == NULL) {
+	*ubuf = CAST(file_unichar_t *, ecalloc(CAST(size_t, 1), mlen));
 	if (*ubuf == NULL) {
 		file_oomem(ms, mlen);
 		goto done;
 	}
@ -102,7 +113,7 @@ file_encoding(struct magic_set *ms, const struct buffer *b, unicodechar **ubuf,
 	if (looks_ascii(buf, nbytes, *ubuf, ulen)) {
 		if (looks_utf7(buf, nbytes, *ubuf, ulen) > 0) {
 			DPRINTF(("utf-7 %" SIZE_T_FORMAT "u\n", *ulen));
-			*code = "UTF-7 Unicode";
+			*code = "Unicode text, UTF-7";
 			*code_mime = "utf-7";
 		} else {
 			DPRINTF(("ascii %" SIZE_T_FORMAT "u\n", *ulen));
@ -111,27 +122,27 @@ file_encoding(struct magic_set *ms, const struct buffer *b, unicodechar **ubuf,
 		}
 	} else if (looks_utf8_with_BOM(buf, nbytes, *ubuf, ulen) > 0) {
 		DPRINTF(("utf8/bom %" SIZE_T_FORMAT "u\n", *ulen));
-		*code = "UTF-8 Unicode (with BOM)";
+		*code = "Unicode text, UTF-8 (with BOM)";
 		*code_mime = "utf-8";
 	} else if (file_looks_utf8(buf, nbytes, *ubuf, ulen) > 1) {
 		DPRINTF(("utf8 %" SIZE_T_FORMAT "u\n", *ulen));
-		*code = "UTF-8 Unicode";
+		*code = "Unicode text, UTF-8";
 		*code_mime = "utf-8";
 	} else if ((ucs_type = looks_ucs32(buf, nbytes, *ubuf, ulen)) != 0) {
 		if (ucs_type == 1) {
-			*code = "Little-endian UTF-32 Unicode";
+			*code = "Unicode text, UTF-32, little-endian";
 			*code_mime = "utf-32le";
 		} else {
-			*code = "Big-endian UTF-32 Unicode";
+			*code = "Unicode text, UTF-32, big-endian";
 			*code_mime = "utf-32be";
 		}
 		DPRINTF(("ucs32 %" SIZE_T_FORMAT "u\n", *ulen));
 	} else if ((ucs_type = looks_ucs16(buf, nbytes, *ubuf, ulen)) != 0) {
 		if (ucs_type == 1) {
-			*code = "Little-endian UTF-16 Unicode";
+			*code = "Unicode text, UTF-16, little-endian";
 			*code_mime = "utf-16le";
 		} else {
-			*code = "Big-endian UTF-16 Unicode";
+			*code = "Unicode text, UTF-16, big-endian";
 			*code_mime = "utf-16be";
 		}
 		DPRINTF(("ucs16 %" SIZE_T_FORMAT "u\n", *ulen));
@ -249,64 +260,40 @@ private char text_chars[256] = {
 	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I   /* 0xfX */
 };
-private int
+#define LOOKS(NAME, COND) \
-looks_ascii(const unsigned char *buf, size_t nbytes, unicodechar *ubuf,
+private int \
-    size_t *ulen)
+looks_ ## NAME(const unsigned char *buf, size_t nbytes, file_unichar_t *ubuf, \
-{
+    size_t *ulen) \
-	size_t i;
+{ \
-
+	size_t i, u; \
-	*ulen = 0;
+	unsigned char dist[256]; \
-
+	memset(dist, 0, sizeof(dist)); \
-	for (i = 0; i < nbytes; i++) {
+\
-		int t = text_chars[buf[i]];
+	*ulen = 0; \
-
+\
-		if (t != T)
+	for (i = 0; i < nbytes; i++) { \
-			return 0;
+		int t = text_chars[buf[i]]; \
-
+\
-		ubuf[(*ulen)++] = buf[i];
+		if (COND) \
-	}
+			return 0; \
-
+\
-	return 1;
+		ubuf[(*ulen)++] = buf[i]; \
 		dist[buf[i]]++; \
 	} \
 	u = 0; \
 	for (i = 0; i < __arraycount(dist); i++) { \
 		if (dist[i]) \
 			u += dist[i]; \
 	} \
 	if (u < 3) \
 		return 0; \
 \
 	return 1; \
 }
-private int
+LOOKS(ascii, t != T)
-looks_latin1(const unsigned char *buf, size_t nbytes, unicodechar *ubuf, size_t *ulen)
+LOOKS(latin1, t != T && t != I)
-{
+LOOKS(extended, t != T && t != I && t != X)
 	size_t i;
 	*ulen = 0;
 	for (i = 0; i < nbytes; i++) {
 		int t = text_chars[buf[i]];
 		if (t != T && t != I)
 			return 0;
 		ubuf[(*ulen)++] = buf[i];
 	}
 	return 1;
 }
 private int
 looks_extended(const unsigned char *buf, size_t nbytes, unicodechar *ubuf,
    size_t *ulen)
 {
 	size_t i;
 	*ulen = 0;
 	for (i = 0; i < nbytes; i++) {
 		int t = text_chars[buf[i]];
 		if (t != T && t != I && t != X)
 			return 0;
 		ubuf[(*ulen)++] = buf[i];
 	}
 	return 1;
 }
 /*
 * Decide whether some text looks like UTF-8. Returns:
@ -319,12 +306,65 @@ looks_extended(const unsigned char *buf, size_t nbytes, unicodechar *ubuf,
 * If ubuf is non-NULL on entry, text is decoded into ubuf, *ulen;
 * ubuf must be big enough!
 */
 // from: https://golang.org/src/unicode/utf8/utf8.go
 #define	XX 0xF1 // invalid: size 1
 #define	AS 0xF0 // ASCII: size 1
 #define	S1 0x02 // accept 0, size 2
 #define	S2 0x13 // accept 1, size 3
 #define	S3 0x03 // accept 0, size 3
 #define	S4 0x23 // accept 2, size 3
 #define	S5 0x34 // accept 3, size 4
 #define	S6 0x04 // accept 0, size 4
 #define	S7 0x44 // accept 4, size 4
 #define LOCB 0x80
 #define HICB 0xBF
 // first is information about the first byte in a UTF-8 sequence.
 static const uint8_t first[] = {
    //   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
    AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x00-0x0F
    AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x10-0x1F
    AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x20-0x2F
    AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x30-0x3F
    AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x40-0x4F
    AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x50-0x5F
    AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x60-0x6F
    AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x70-0x7F
    //   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
    XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0x80-0x8F
    XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0x90-0x9F
    XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0xA0-0xAF
    XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0xB0-0xBF
    XX, XX, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, // 0xC0-0xCF
    S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, // 0xD0-0xDF
    S2, S3, S3, S3, S3, S3, S3, S3, S3, S3, S3, S3, S3, S4, S3, S3, // 0xE0-0xEF
    S5, S6, S6, S6, S7, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0xF0-0xFF
 };
 // acceptRange gives the range of valid values for the second byte in a UTF-8
 // sequence.
 struct accept_range {
 	uint8_t lo; // lowest value for second byte.
 	uint8_t hi; // highest value for second byte.
 } accept_ranges[16] = {
 // acceptRanges has size 16 to avoid bounds checks in the code that uses it.
 	{ LOCB, HICB },
 	{ 0xA0, HICB },
 	{ LOCB, 0x9F },
 	{ 0x90, HICB },
 	{ LOCB, 0x8F },
 };
 protected int
-file_looks_utf8(const unsigned char *buf, size_t nbytes, unicodechar *ubuf, size_t *ulen)
+file_looks_utf8(const unsigned char *buf, size_t nbytes, file_unichar_t *ubuf,
    size_t *ulen)
 {
 	size_t i;
 	int n;
-	unicodechar c;
+	file_unichar_t c;
 	int gotone = 0, ctrl = 0;
 	if (ubuf)
@ -346,6 +386,10 @@ file_looks_utf8(const unsigned char *buf, size_t nbytes, unicodechar *ubuf, size
 			return -1;
 		} else {			   /* 11xxxxxx begins UTF-8 */
 			int following;
 			uint8_t x = first[buf[i]];
 			const struct accept_range *ar = &accept_ranges[x >> 4];
 			if (x == XX)
 				return -1;
 			if ((buf[i] & 0x20) == 0) {		/* 110xxxxx */
 				c = buf[i] & 0x1f;
@ -370,6 +414,10 @@ file_looks_utf8(const unsigned char *buf, size_t nbytes, unicodechar *ubuf, size
 				if (i >= nbytes)
 					goto done;
 				if (n == 0 &&
 				     (buf[i] < ar->lo || buf[i] > ar->hi))
 					return -1;
 				if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40))
 					return -1;
@ -391,8 +439,8 @@ done:
 * rest of the text.
 */
 private int
-looks_utf8_with_BOM(const unsigned char *buf, size_t nbytes, unicodechar *ubuf,
+looks_utf8_with_BOM(const unsigned char *buf, size_t nbytes,
-    size_t *ulen)
+    file_unichar_t *ubuf, size_t *ulen)
 {
 	if (nbytes > 3 && buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf)
 		return file_looks_utf8(buf + 3, nbytes - 3, ubuf, ulen);
@ -401,7 +449,8 @@ looks_utf8_with_BOM(const unsigned char *buf, size_t nbytes, unicodechar *ubuf,
 }
 private int
-looks_utf7(const unsigned char *buf, size_t nbytes, unicodechar *ubuf, size_t *ulen)
+looks_utf7(const unsigned char *buf, size_t nbytes, file_unichar_t *ubuf,
    size_t *ulen)
 {
 	if (nbytes > 4 && buf[0] == '+' && buf[1] == '/' && buf[2] == 'v')
 		switch (buf[3]) {
@ -420,7 +469,7 @@ looks_utf7(const unsigned char *buf, size_t nbytes, unicodechar *ubuf, size_t *u
 }
 private int
-looks_ucs16(const unsigned char *bf, size_t nbytes, unicodechar *ubf,
+looks_ucs16(const unsigned char *bf, size_t nbytes, file_unichar_t *ubf,
    size_t *ulen)
 {
 	int bigend;
@ -443,10 +492,10 @@ looks_ucs16(const unsigned char *bf, size_t nbytes, unicodechar *ubf,
 		if (bigend)
 			ubf[(*ulen)++] = bf[i + 1]
-			    | (CAST(unicodechar, bf[i]) << 8);
+			    | (CAST(file_unichar_t, bf[i]) << 8);
 		else
 			ubf[(*ulen)++] = bf[i]
-			    | (CAST(unicodechar, bf[i + 1]) << 8);
+			    | (CAST(file_unichar_t, bf[i + 1]) << 8);
 		if (ubf[*ulen - 1] == 0xfffe)
 			return 0;
@ -459,7 +508,7 @@ looks_ucs16(const unsigned char *bf, size_t nbytes, unicodechar *ubf,
 }
 private int
-looks_ucs32(const unsigned char *bf, size_t nbytes, unicodechar *ubf,
+looks_ucs32(const unsigned char *bf, size_t nbytes, file_unichar_t *ubf,
    size_t *ulen)
 {
 	int bigend;
@ -481,15 +530,15 @@ looks_ucs32(const unsigned char *bf, size_t nbytes, unicodechar *ubf,
 		/* XXX fix to properly handle chars > 65536 */
 		if (bigend)
-			ubf[(*ulen)++] = CAST(unicodechar, bf[i + 3])
+			ubf[(*ulen)++] = CAST(file_unichar_t, bf[i + 3])
-			    | (CAST(unicodechar, bf[i + 2]) << 8)
+			    | (CAST(file_unichar_t, bf[i + 2]) << 8)
-			    | (CAST(unicodechar, bf[i + 1]) << 16)
+			    | (CAST(file_unichar_t, bf[i + 1]) << 16)
-			    | (CAST(unicodechar, bf[i]) << 24);
+			    | (CAST(file_unichar_t, bf[i]) << 24);
 		else
-			ubf[(*ulen)++] = CAST(unicodechar, bf[i + 0])
+			ubf[(*ulen)++] = CAST(file_unichar_t, bf[i + 0])
-			    | (CAST(unicodechar, bf[i + 1]) << 8) 
+			    | (CAST(file_unichar_t, bf[i + 1]) << 8) 
-			    | (CAST(unicodechar, bf[i + 2]) << 16)
+			    | (CAST(file_unichar_t, bf[i + 2]) << 16)
-			    | (CAST(unicodechar, bf[i + 3]) << 24);
+			    | (CAST(file_unichar_t, bf[i + 3]) << 24);
 		if (ubf[*ulen - 1] == 0xfffe)
 			return 0;
--- a/ext/fileinfo/libmagic/file.h
+++ b/ext/fileinfo/libmagic/file.h
@ -27,7 +27,7 @@
 */
 /*
 * file.h - definitions for file(1) program
- * @(#)$File: file.h,v 1.220 2020/06/08 17:38:27 christos Exp $
+ * @(#)$File: file.h,v 1.225 2021/02/05 22:29:07 christos Exp $
 */
 #ifndef __file_h__
@ -35,6 +35,7 @@
 #include "config.h"
 #include "php.h"
 #include "ext/standard/php_string.h"
 #include "ext/pcre/php_pcre.h"
@ -136,6 +137,14 @@
 #define	MAX(a,b)	(((a) > (b)) ? (a) : (b))
 #endif
 #ifndef O_CLOEXEC
 # define O_CLOEXEC 0
 #endif
 #ifndef FD_CLOEXEC
 # define FD_CLOEXEC 1
 #endif
 #define FILE_BADSIZE CAST(size_t, ~0ul)
 #define MAXDESC	64		/* max len of text description/MIME type */
 #define MAXMIME	80		/* max len of text MIME type */
@ -403,14 +412,16 @@ struct level_info {
 #endif
 };
 struct cont {
 	size_t len;
 	struct level_info *li;
 };
 #define MAGIC_SETS	2
 struct magic_set {
 	struct mlist *mlist[MAGIC_SETS];	/* list of regular entries */
-	struct cont {
+	struct cont c;
 		size_t len;
 		struct level_info *li;
 	} c;
 	struct out {
 		char *buf;		/* Accumulation buffer */
 		size_t blen;		/* Length of buffer */
@ -445,6 +456,7 @@ struct magic_set {
 	uint16_t elf_notes_max;
 	uint16_t regex_max;
 	size_t bytes_max;		/* number of bytes to read from file */
 	size_t encoding_max;		/* bytes to look for encoding */
 #ifndef FILE_BYTES_MAX
 # define FILE_BYTES_MAX (1024 * 1024)	/* how much of the file to look at */
 #endif
@ -454,11 +466,13 @@ struct magic_set {
 #define	FILE_INDIR_MAX			50
 #define	FILE_NAME_MAX			50
 #define	FILE_REGEX_MAX			8192
 #define	FILE_ENCODING_MAX		(64 * 1024)
 };
 /* Type for Unicode characters */
-typedef unsigned long unicodechar;
+typedef unsigned long file_unichar_t;
 struct stat;
 #define FILE_T_LOCAL	1
 #define FILE_T_WINDOWS	2
 protected const char *file_fmttime(char *, size_t, uint64_t, int);
@ -468,6 +482,8 @@ protected int file_buffer(struct magic_set *, php_stream *, zend_stat_t *, const
    size_t);
 protected int file_fsmagic(struct magic_set *, const char *, zend_stat_t *);
 protected int file_pipe2file(struct magic_set *, int, const void *, size_t);
 protected int file_vprintf(struct magic_set *, const char *, va_list)
    __attribute__((__format__(__printf__, 2, 0)));
 protected int file_separator(struct magic_set *);
 protected char *file_copystr(char *, size_t, size_t, const char *);
 protected int file_checkfmt(char *, size_t, const char *);
@ -486,15 +502,17 @@ protected int file_zmagic(struct magic_set *, const struct buffer *,
 protected int file_ascmagic(struct magic_set *, const struct buffer *,
    int);
 protected int file_ascmagic_with_encoding(struct magic_set *,
-    const struct buffer *, unicodechar *, size_t, const char *, const char *, int);
+    const struct buffer *, file_unichar_t *, size_t, const char *, const char *, int);
 protected int file_encoding(struct magic_set *, const struct buffer *,
-    unicodechar **, size_t *, const char **, const char **, const char **);
+    file_unichar_t **, size_t *, const char **, const char **, const char **);
 protected int file_is_json(struct magic_set *, const struct buffer *);
 protected int file_is_csv(struct magic_set *, const struct buffer *, int);
 protected int file_is_tar(struct magic_set *, const struct buffer *);
 protected int file_softmagic(struct magic_set *, const struct buffer *,
    uint16_t *, uint16_t *, int, int);
 protected int file_apprentice(struct magic_set *, const char *, int);
 protected int buffer_apprentice(struct magic_set *, struct magic **,
    size_t *, size_t);
 protected int file_magicfind(struct magic_set *, const char *, struct mlist *);
 protected uint64_t file_signextend(struct magic_set *, struct magic *,
    uint64_t);
@ -510,7 +528,7 @@ protected size_t file_mbswidth(const char *);
 protected const char *file_getbuffer(struct magic_set *);
 protected ssize_t sread(int, void *, size_t, int);
 protected int file_check_mem(struct magic_set *, unsigned int);
-protected int file_looks_utf8(const unsigned char *, size_t, unicodechar *,
+protected int file_looks_utf8(const unsigned char *, size_t, file_unichar_t *,
    size_t *);
 protected size_t file_pstring_length_size(struct magic_set *,
    const struct magic *);
@ -521,6 +539,9 @@ protected char * file_printable(char *, size_t, const char *, size_t);
 protected int file_os2_apptype(struct magic_set *, const char *, const void *,
    size_t);
 #endif /* __EMX__ */
 protected int file_pipe_closexec(int *);
 protected int file_clear_closexec(int);
 protected char *file_strtrim(char *);
 protected void buffer_init(struct buffer *, int, const zend_stat_t *,
    const void *, size_t);
--- a/ext/fileinfo/libmagic/funcs.c
+++ b/ext/fileinfo/libmagic/funcs.c
@ -27,7 +27,7 @@
 #include "file.h"
 #ifndef	lint
-FILE_RCSID("@(#)$File: funcs.c,v 1.115 2020/02/20 15:50:20 christos Exp $")
+FILE_RCSID("@(#)$File: funcs.c,v 1.121 2021/02/05 22:29:07 christos Exp $")
 #endif	/* lint */
 #include "magic.h"
@ -36,6 +36,9 @@ FILE_RCSID("@(#)$File: funcs.c,v 1.115 2020/02/20 15:50:20 christos Exp $")
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
 #ifdef HAVE_UNISTD_H
 #include <unistd.h>	/* for pipe2() */
 #endif
 #if defined(HAVE_WCHAR_H)
 #include <wchar.h>
 #endif
@ -100,7 +103,7 @@ file_checkfmt(char *msg, size_t mlen, const char *fmt)
 		if (*++p == '%')
 			continue;
 		// Skip uninteresting.
-		while (strchr("0.'+- ", *p) != NULL)
+		while (strchr("#0.'+- ", *p) != NULL)
 			p++;
 		if (*p == '*') {
 			if (msg)
@ -126,27 +129,56 @@ file_checkfmt(char *msg, size_t mlen, const char *fmt)
 	return 0;
 }
 /*
 * Like printf, only we append to a buffer.
 */
 protected int
 file_vprintf(struct magic_set *ms, const char *fmt, va_list ap)
 {
 	size_t len;
 	char *buf, *newstr;
 	char tbuf[1024];
 	if (ms->event_flags & EVENT_HAD_ERR)
 		return 0;
 	if (file_checkfmt(tbuf, sizeof(tbuf), fmt)) {
 		file_clearbuf(ms);
 		file_error(ms, 0, "Bad magic format `%s' (%s)", fmt, tbuf);
 		return -1;
 	}
 	len = vspprintf(&buf, 0, fmt, ap);
 	if (len > 1024 || len + ms->o.blen > 1024 * 1024) {
 		size_t blen = ms->o.blen;
 		if (buf) efree(buf);
 		file_clearbuf(ms);
 		file_error(ms, 0, "Output buffer space exceeded %d+%zu", len,
 		    blen);
 		return -1;
 	}
 	if (ms->o.buf != NULL) {
 		len = spprintf(&newstr, 0, "%s%s", ms->o.buf, buf);
 		efree(buf);
 		efree(ms->o.buf);
 		buf = newstr;
 	}
 	ms->o.buf = buf;
 	ms->o.blen = len;
 	return 0;
 }
 protected int
 file_printf(struct magic_set *ms, const char *fmt, ...)
 {
 	int rv;
 	va_list ap;
 	char *buf = NULL, *newstr;
 	va_start(ap, fmt);
-	vspprintf(&buf, 0, fmt, ap);
+	rv = file_vprintf(ms, fmt, ap);
 	va_end(ap);
-
+	return rv;
 	if (ms->o.buf != NULL) {
 		spprintf(&newstr, 0, "%s%s", ms->o.buf, (buf ? buf : ""));
 		if (buf) {
 			efree(buf);
 		}
 		efree(ms->o.buf);
 		ms->o.buf = newstr;
 	} else {
 		ms->o.buf = buf;
 	}
 	return 0;
 }
 /*
@ -157,30 +189,18 @@ private void
 file_error_core(struct magic_set *ms, int error, const char *f, va_list va,
    size_t lineno)
 {
 	char *buf = NULL;
 	/* Only the first error is ok */
 	if (ms->event_flags & EVENT_HAD_ERR)
 		return;
 	if (lineno != 0) {
-		efree(ms->o.buf);
+		file_clearbuf(ms);
-		ms->o.buf = NULL;
+		(void)file_printf(ms, "line %" SIZE_T_FORMAT "u:", lineno);
 		file_printf(ms, "line %" SIZE_T_FORMAT "u:", lineno);
 	}
-
+	if (ms->o.buf && *ms->o.buf)
-	vspprintf(&buf, 0, f, va);
+		(void)file_printf(ms, " ");
-	va_end(va);
+	(void)file_vprintf(ms, f, va);
-
+	if (error > 0)
-	if (error > 0) {
+		(void)file_printf(ms, " (%s)", strerror(error));
 		file_printf(ms, "%s (%s)", (*buf ? buf : ""), strerror(error));
 	} else if (*buf) {
 		file_printf(ms, "%s", buf);
 	}
 	if (buf) {
 		efree(buf);
 	}
 	ms->event_flags |= EVENT_HAD_ERR;
 	ms->error = error;
 }
@ -228,11 +248,31 @@ file_badread(struct magic_set *ms)
 }
 #ifndef COMPILE_ONLY
 #define FILE_SEPARATOR "\n- "
 protected int
 file_separator(struct magic_set *ms)
 {
-	return file_printf(ms, "\n- ");
+	return file_printf(ms, FILE_SEPARATOR);
 }
 static void
 trim_separator(struct magic_set *ms)
 {
 	size_t l;
 	if (ms->o.buf == NULL)
 		return;
 	l = strlen(ms->o.buf);
 	if (l < sizeof(FILE_SEPARATOR))
 		return;
 	l -= sizeof(FILE_SEPARATOR) - 1;
 	if (strcmp(ms->o.buf + l, FILE_SEPARATOR) != 0)
 		return;
 	ms->o.buf[l] = '\0';
 }
 static int
@ -450,6 +490,7 @@ simple:
 				rv = -1;
 	}
 done:
 	trim_separator(ms);
 	if ((ms->flags & MAGIC_MIME_ENCODING) != 0) {
 		if (ms->flags & MAGIC_MIME_TYPE)
 			if (file_printf(ms, "; charset=") == -1)
@ -598,7 +639,7 @@ file_check_mem(struct magic_set *ms, unsigned int level)
 protected size_t
 file_printedlen(const struct magic_set *ms)
 {
-	return ms->o.buf == NULL ? 0 : strlen(ms->o.buf);
+	return ms->o.blen;
 }
 protected int
@ -717,7 +758,7 @@ struct guid {
 protected int
 file_parse_guid(const char *s, uint64_t *guid)
 {
-	struct guid *g = CAST(struct guid *, guid);
+	struct guid *g = CAST(struct guid *, CAST(void *, guid));
 	return sscanf(s,
 	    "%8x-%4hx-%4hx-%2hhx%2hhx-%2hhx%2hhx%2hhx%2hhx%2hhx%2hhx",
 	    &g->data1, &g->data2, &g->data3, &g->data4[0], &g->data4[1],
@ -728,7 +769,8 @@ file_parse_guid(const char *s, uint64_t *guid)
 protected int
 file_print_guid(char *str, size_t len, const uint64_t *guid)
 {
-	const struct guid *g = CAST(const struct guid *, guid);
+	const struct guid *g = CAST(const struct guid *,
 	    CAST(const void *, guid));
 	return snprintf(str, len, "%.8X-%.4hX-%.4hX-%.2hhX%.2hhX-"
 	    "%.2hhX%.2hhX%.2hhX%.2hhX%.2hhX%.2hhX",
@ -736,3 +778,39 @@ file_print_guid(char *str, size_t len, const uint64_t *guid)
 	    g->data4[2], g->data4[3], g->data4[4], g->data4[5],
 	    g->data4[6], g->data4[7]);
 }
 protected int
 file_pipe_closexec(int *fds)
 {
 #ifdef HAVE_PIPE2
 	return pipe2(fds, O_CLOEXEC);
 #else
 	if (pipe(fds) == -1)
 		return -1;
 	(void)fcntl(fds[0], F_SETFD, FD_CLOEXEC);
 	(void)fcntl(fds[1], F_SETFD, FD_CLOEXEC);
 	return 0;
 #endif
 }
 protected int
 file_clear_closexec(int fd) {
 	return fcntl(fd, F_SETFD, 0);
 }
 protected char *
 file_strtrim(char *str)
 {
 	char *last;
 	while (isspace(CAST(unsigned char, *str)))
 		str++;
 	last = str;
 	while (*last)
 		last++;
 	--last;
 	while (isspace(CAST(unsigned char, *last)))
 		last--;
 	*++last = '\0';
 	return str;
 }
--- a/ext/fileinfo/libmagic/is_csv.c
+++ b/ext/fileinfo/libmagic/is_csv.c
@ -32,7 +32,7 @@
 #include "file.h"
 #ifndef lint
-FILE_RCSID("@(#)$File: is_csv.c,v 1.4 2019/06/26 20:31:31 christos Exp $")
+FILE_RCSID("@(#)$File: is_csv.c,v 1.6 2020/08/09 16:43:36 christos Exp $")
 #endif
 #include <string.h>
@ -94,8 +94,7 @@ csv_parse(const unsigned char *uc, const unsigned char *ue)
 	size_t nf = 0, tf = 0, nl = 0;
 	while (uc < ue) {
-		unsigned char c;
+		switch (*uc++) {
 		switch (c = *uc++) {
 		case '"':
 			// Eat until the matching quote
 			uc = eatquote(uc, ue);
@ -150,7 +149,7 @@ file_is_csv(struct magic_set *ms, const struct buffer *b, int looks_text)
 		return 1;
 	if (mime) {
-		if (file_printf(ms, "application/csv") == -1)
+		if (file_printf(ms, "text/csv") == -1)
 			return -1;
 		return 1;
 	}
--- a/ext/fileinfo/libmagic/magic.c
+++ b/ext/fileinfo/libmagic/magic.c
@ -28,7 +28,7 @@
 #include "file.h"
 #ifndef	lint
-FILE_RCSID("@(#)$File: magic.c,v 1.112 2020/06/08 19:44:10 christos Exp $")
+FILE_RCSID("@(#)$File: magic.c,v 1.114 2021/02/05 21:33:49 christos Exp $")
 #endif	/* lint */
 #include "magic.h"
@ -348,6 +348,9 @@ magic_setparam(struct magic_set *ms, int param, const void *val)
 	case MAGIC_PARAM_BYTES_MAX:
 		ms->bytes_max = *CAST(const size_t *, val);
 		return 0;
 	case MAGIC_PARAM_ENCODING_MAX:
 		ms->encoding_max = *CAST(const size_t *, val);
 		return 0;
 	default:
 		errno = EINVAL;
 		return -1;
@ -381,6 +384,9 @@ magic_getparam(struct magic_set *ms, int param, void *val)
 	case MAGIC_PARAM_BYTES_MAX:
 		*CAST(size_t *, val) = ms->bytes_max;
 		return 0;
 	case MAGIC_PARAM_ENCODING_MAX:
 		*CAST(size_t *, val) = ms->encoding_max;
 		return 0;
 	default:
 		errno = EINVAL;
 		return -1;
--- a/ext/fileinfo/libmagic/magic.h
+++ b/ext/fileinfo/libmagic/magic.h
@ -113,7 +113,7 @@ b\31transp_compression\0\
 #define	MAGIC_NO_CHECK_FORTRAN	0x000000 /* Don't check ascii/fortran */
 #define	MAGIC_NO_CHECK_TROFF	0x000000 /* Don't check ascii/troff */
-#define MAGIC_VERSION		539	/* This implementation */
+#define MAGIC_VERSION		540	/* This implementation */
 #ifdef __cplusplus
@ -150,6 +150,7 @@ int magic_errno(magic_t);
 #define MAGIC_PARAM_ELF_NOTES_MAX	4
 #define MAGIC_PARAM_REGEX_MAX		5
 #define	MAGIC_PARAM_BYTES_MAX		6
 #define	MAGIC_PARAM_ENCODING_MAX	7
 int magic_setparam(magic_t, int, const void *);
 int magic_getparam(magic_t, int, void *);
--- a/ext/fileinfo/libmagic/patchlevel.h
+++ b/ext/fileinfo/libmagic/patchlevel.h
@ -1,390 +0,0 @@
 #define	FILE_VERSION_MAJOR	5
 #define	patchlevel		37
 /*
 * Patchlevel file for Ian Darwin's MAGIC command.
 * $File: patchlevel.h,v 1.68 2008/03/22 21:39:43 christos Exp $
 *
 * $Log$
 * Revision 2.1  2019/05/30 22:27:12 ab
 * Update libmagic to 5.37
 *
 * $Log$
 * Revision 2.1  2018/04/26 22:27:12 ab
 * Update libmagic to 5.33
 *
 * $Log$
 * Revision 2.0  2017/10/11 22:27:12 ab
 * Update libmagic to 5.31
 *
 * $Log$
 * Revision 1.9  2016/11/24 22:27:12 ab
 * Update libmagic to 5.29
 *
 * $Log$
 * Revision 1.9  2016/10/11 22:27:12 ab
 * Update libmagic to 5.28
 *
 * $Log$
 * Revision 1.9  2015/03/06 22:27:12 ab
 * Update libmagic to 5.2X
 *
 * $Log$
 * Revision 1.8  2014/02/18 22:27:12 ab
 * Update libmagic to 5.17
 *
 * $Log$
 * Revision 1.7  2013/03/26 22:27:12 ab
 * Update libmagic to 5.14
 *
 * $Log$
 * Revision 1.6  2012/03/26 21:01:37 ab
 * Update libmagic to 5.11
 *
 * Revision 1.5  2012/03/25 13:54:37  ab
 * Update libmagic to 5.04
 *
 * Revision 1.4  2009/05/04 20:52:43  scottmac
 * Update libmagic to 5.02
 *
 * Revision 1.3  2009/03/15 23:02:35  scottmac
 * Update fileinfo to libmagic 5.00 and remove dependency on dirent.h on Windows
 *
 * Revision 1.2  2008/11/02 16:09:27  scottmac
 * Update libmagic to 4.26 and add support for v6 of the magic file format.
 *
 * Revision 1.1  2008/07/11 14:13:50  derick
 * - Move lib to libmagic
 *
 * Revision 1.1  2008/07/11 14:10:50  derick
 * - Step one for bundling the libmagic library. Some config.m4 issues left.
 *
 * Revision 1.69  2008/07/02 15:27:05  christos
 * welcome to 4.25
 *
 * Revision 1.68  2008/03/22 21:39:43  christos
 * file 4.24
 *
 * Revision 1.67  2007/12/28 20:08:40  christos
 * welcome to 4.23.
 *
 * Revision 1.66  2007/12/27 16:38:24  christos
 * welcome to 4.22
 *
 * Revision 1.65  2007/05/24 17:22:27  christos
 * Welcome to 4.21
 *
 * Revision 1.64  2007/03/01 22:14:55  christos
 * welcome to 4.20
 *
 * Revision 1.63  2007/01/12 17:38:28  christos
 * Use File id.
 *
 * Revision 1.62  2006/12/11 21:49:58  christos
 * time for 4.19
 *
 * Revision 1.61  2006/10/31 21:18:09  christos
 * bump
 *
 * Revision 1.60  2006/03/02 22:15:12  christos
 * welcome to 4.17
 *
 * Revision 1.59  2005/10/17 17:15:21  christos
 * welcome to 4.16
 *
 * Revision 1.58  2005/08/18 15:52:56  christos
 * welcome to 4.15
 *
 * Revision 1.57  2005/06/25 15:52:14  christos
 * Welcome to 4.14
 *
 * Revision 1.56  2005/02/09 19:25:13  christos
 * Welcome to 4.13
 *
 * Revision 1.55  2004/11/24 18:57:47  christos
 * Re-do the autoconf stuff once more; passes make dist now.
 *
 * Revision 1.54  2004/11/21 05:52:05  christos
 * ready for 4.11
 *
 * Revision 1.53  2004/07/24 20:40:46  christos
 * welcome to 4.10
 *
 * Revision 1.52  2004/04/07 00:32:25  christos
 * welcome to 4.09
 *
 * Revision 1.51  2004/03/22 21:17:11  christos
 * welcome to 4.08.
 *
 * Revision 1.50  2003/12/23 17:34:04  christos
 * 4.07
 *
 * Revision 1.49  2003/10/15 02:08:27  christos
 * welcome to 4.06
 *
 * Revision 1.48  2003/09/12 19:41:14  christos
 * this is 4.04
 *
 * Revision 1.47  2003/05/23 21:38:21  christos
 * welcome to 4.03
 *
 * Revision 1.46  2003/04/02 18:57:43  christos
 * prepare for 4.02
 *
 * Revision 1.45  2003/03/26 15:37:25  christos
 * - Pass lint
 * - make NULL in magic_file mean stdin
 * - Fix "-" argument to file to pass NULL to magic_file
 * - avoid pointer casts by using memcpy
 * - rename magic_buf -> magic_buffer
 * - keep only the first error
 * - manual page: new sentence, new line
 * - fix typo in api function (magic_buf -> magic_buffer)
 *
 * Revision 1.44  2003/03/23 22:23:31  christos
 * finish librarification.
 *
 * Revision 1.43  2003/03/23 21:16:26  christos
 * update copyrights.
 *
 * Revision 1.42  2003/03/23 04:06:05  christos
 * Library re-organization
 *
 * Revision 1.41  2003/02/27 20:53:45  christos
 * - fix memory allocation problem (Jeff Johnson)
 * - fix stack overflow corruption (David Endler)
 * - fixes from NetBSD source (Antti Kantee)
 * - magic fixes
 *
 * Revision 1.40  2003/02/08 18:33:53  christos
 * - detect inttypes.h too (Dave Love <d.love@dl.ac.uk>)
 * - eliminate unsigned char warnings (Petter Reinholdtsen <pere@hungry.com>)
 * - better elf PT_NOTE handling (Nalin Dahyabhai <nalin@redhat.com>)
 * - add options to format the output differently
 * - much more magic.
 *
 * Revision 1.39  2002/07/03 18:57:52  christos
 * - ansify/c99ize
 * - more magic
 * - better COMPILE_ONLY support.
 * - new magic files.
 * - fix solaris compilation problems.
 *
 * Revision 1.38  2002/05/16 18:45:56  christos
 * - pt_note elf additions from NetBSD
 * - EMX os specific changes (Alexander Mai)
 * - stdint.h detection, acconfig.h fixes (Maciej W. Rozycki, Franz Korntner)
 * - regex file additions (Kim Cromie)
 * - getopt_long support and misc cleanups (Michael Piefel)
 * - many magic fixes and additions
 *
 * Revision 1.37  2001/09/03 14:44:22  christos
 * daylight/tm_isdst detection
 * magic fixes
 * don't eat the whole file if it has only nulls
 *
 * Revision 1.36  2001/07/22 21:04:15  christos
 * - magic fixes
 * - add new operators, pascal strings, UTC date printing, $HOME/.magic
 *   [from "Tom N Harris" <telliamed@mac.com>]
 *
 * Revision 1.35  2001/04/24 14:40:25  christos
 * - rename magic file sgi to mips and fix it
 * - add support for building magic.mgc
 * - portability fixes for mmap()
 * - try gzip before uncompress, because uncompress sometimes hangs
 * - be more conservative about pipe reads and writes
 * - many magic fixes
 *
 * Revision 1.34  2001/03/12 05:05:57  christos
 * - new compiled magic format
 * - lots of magic additions
 *
 * Revision 1.33  2000/11/13 00:30:50  christos
 * - wordperfect magic fix: freebsd pr 9388
 * - more msdos fixes from freebsd pr's 20131 and 20812
 * - sas and spss magic [Bruce Foster]
 * - mkinstalldirs [John Fremlin]
 * - sgi opengl fixes [Michael Pruett]
 * - netbsd magic fixes [Ignatios Souvatzis]
 * - audio additions [Michael Pruett]
 * - fix problem with non ansi RCSID [Andreas Ley]
 * - oggs magic [Felix von Leitner]
 * - gmon magic [Eugen Dedu]
 * - TNEF magic [Joomy]
 * - netpbm magic and misc other image stuff [Bryan Henderson]
 *
 * Revision 1.32  2000/08/05 18:24:18  christos
 * Correct indianness detection in elf (Charles Hannum)
 * FreeBSD elf core support (Guy Harris)
 * Use gzip in systems that don't have uncompress (Anthon van der Neut)
 * Internationalization/EBCDIC support (Eric Fisher)
 * Many many magic changes
 *
 * Revision 1.31  2000/05/14 17:58:36  christos
 * - new magic for claris files
 * - new magic for mathematica and maple files
 * - new magic for msvc files
 * - new -k flag to keep going matching all possible entries
 * - add the word executable on #! magic files, and fix the usage of
 *   the word script
 * - lots of other magic fixes
 * - fix typo test -> text
 *
 * Revision 1.30  2000/04/11 02:41:17  christos
 * - add support for mime output (-i)
 * - make sure we free memory in case realloc fails
 * - magic fixes
 *
 * Revision 1.29  1999/11/28 20:02:29  christos
 * new string/[Bcb] magic from anthon, and adjustments to the magic files to
 * use it.
 *
 * Revision 1.28  1999/10/31 22:11:48  christos
 * - add "char" type for compatibility with HP/UX
 * - recognize HP/UX syntax &=n etc.
 * - include errno.h for CYGWIN
 * - conditionalize the S_IS* macros
 * - revert the SHT_DYNSYM test that broke the linux stripped binaries test
 * - lots of Magdir changes
 *
 * Revision 1.27  1999/02/14 17:21:41  christos
 * Automake support and misc cleanups from Rainer Orth
 * Enable reading character and block special files from Dale R. Worley
 *
 * Revision 1.26  1998/09/12 13:19:39  christos
 * - add support for bi-endian indirect offsets (Richard Verhoeven)
 * - add recognition for bcpl (Joseph Myers)
 * - remove non magic files from Magdir to avoid difficulties building
 *   on os2 where files are case independent
 * - magic fixes.
 *
 * Revision 1.25  1998/06/27 14:04:04  christos
 * OLF patch Guy Harris
 * Recognize java/html (debian linux)
 * Const poisoning (debian linux)
 * More magic!
 *
 * Revision 1.24  1998/02/15 23:20:38  christos
 * Autoconf patch: Felix von Leitner <leitner@math.fu-berlin.de>
 * More magic fixes
 * Elf64 fixes
 *
 * Revision 1.23  1997/11/05 16:03:37  christos
 * - correct elf prps offset for SunOS-2.5.1 [guy@netapp.com]
 * - handle 64 bit time_t's correctly [ewt@redhat.com]
 * - new mime style magic [clarosse@netvista.net]
 * - new TI calculator magic [rmcguire@freenet.columbus.oh.us]
 * - new figlet fonts [obrien@freebsd.org]
 * - new cisco magic, and elf fixes [jhawk@bbnplanet.com]
 * - -b flag addition, and x86 filesystem magic [vax@linkhead.paranoia.com]
 * - s/Mpeg/MPEG, header and elf typo fixes [guy@netapp.com]
 * - Windows/NT registry files, audio code [guy@netapp.com]
 * - libGrx graphics lib fonts [guy@netapp.com]
 * - PNG fixes [guy@netapp.com]
 * - more m$ document magic [guy@netapp.com]
 * - PPD files [guy@netapp.com]
 * - archive magic cleanup [guy@netapp.com]
 * - linux kernel magic cleanup [guy@netapp.com]
 * - lecter magic [guy@netapp.com]
 * - vgetty magic [guy@netapp.com]
 * - sniffer additions [guy@netapp.com]
 *
 * Revision 1.22  1997/01/15 17:23:24  christos
 * - add support for elf core files: find the program name under SVR4 [Ken Pizzini]
 * - print strings only up to the first carriage return [various]
 * - freebsd international ascii support [J Wunsch]
 * - magic fixes and additions [Guy Harris]
 * - 64 bit fixes [Larry Schwimmer]
 * - support for both utime and utimes, but don't restore file access times
 *   by default [various]
 * - \xXX only takes 2 hex digits, not 3.
 * - re-implement support for core files [Guy Harris]
 *
 * Revision 1.21  1996/10/05 18:15:29  christos
 * Segregate elf stuff and conditionally enable it with -DBUILTIN_ELF
 * More magic fixes
 *
 * Revision 1.20  1996/06/22  22:15:52  christos
 * - support relative offsets of the form >&
 * - fix bug with truncating magic strings that contain \n
 * - file -f - did not read from stdin as documented
 * - support elf file parsing using our own elf support.
 * - as always magdir fixes and additions.
 *
 * Revision 1.19  1995/10/27  23:14:46  christos
 * Ability to parse colon separated list of magic files
 * New LEGAL.NOTICE
 * Various magic file changes
 *
 * Revision 1.18  1995/05/20  22:09:21  christos
 * Passed incorrect argument to eatsize().
 * Use %ld and %lx where appropriate.
 * Remove unused variables
 * ELF support for both big and little endian
 * Fixes for small files again.
 *
 * Revision 1.17  1995/04/28  17:29:13  christos
 * - Incorrect nroff detection fix from der Mouse
 * - Lost and incorrect magic entries.
 * - Added ELF stripped binary detection [in C; ugh]
 * - Look for $MAGIC to find the magic file.
 * - Eat trailing size specifications from numbers i.e. ignore 10L
 * - More fixes for very short files
 *
 * Revision 1.16  1995/03/25  22:06:45  christos
 * - use strtoul() where it exists.
 * - fix sign-extend bug
 * - try to detect tar archives before nroff files, otherwise
 *   tar files where the first file starts with a . will not work
 *
 * Revision 1.15  1995/01/21  21:03:35  christos
 * Added CSECTION for the file man page
 * Added version flag -v
 * Fixed bug with -f input flag (from iorio@violet.berkeley.edu)
 * Lots of magic fixes and reorganization...
 *
 * Revision 1.14  1994/05/03  17:58:23  christos
 * changes from mycroft@gnu.ai.mit.edu (Charles Hannum) for unsigned
 *
 * Revision 1.13  1994/01/21  01:27:01  christos
 * Fixed null termination bug from Don Seeley at BSDI in ascmagic.c
 *
 * Revision 1.12  1993/10/27  20:59:05  christos
 * Changed -z flag to understand gzip format too.
 * Moved builtin compression detection to a table, and move
 * the compress magic entry out of the source.
 * Made printing of numbers unsigned, and added the mask to it.
 * Changed the buffer size to 8k, because gzip will refuse to
 * unzip just a few bytes.
 *
 * Revision 1.11  1993/09/24  18:49:06  christos
 * Fixed small bug in softmagic.c introduced by
 * copying the data to be examined out of the input
 * buffer. Changed the Makefile to use sed to create
 * the correct man pages.
 *
 * Revision 1.10  1993/09/23  21:56:23  christos
 * Passed purify. Fixed indirections. Fixed byte order printing.
 * Fixed segmentation faults caused by referencing past the end
 * of the magic buffer. Fixed bus errors caused by referencing
 * unaligned shorts or longs.
 *
 * Revision 1.9  1993/03/24  14:23:40  ian
 * Batch of minor changes from several contributors.
 *
 * Revision 1.8  93/02/19  15:01:26  ian
 * Numerous changes from Guy Harris too numerous to mention but including
 * byte-order independance, fixing "old-style masking", etc. etc. A bugfix
 * for broken symlinks from martin@@d255s004.zfe.siemens.de.
 *
 * Revision 1.7  93/01/05  14:57:27  ian
 * Couple of nits picked by Christos (again, thanks).
 *
 * Revision 1.6  93/01/05  13:51:09  ian
 * Lotsa work on the Magic directory.
 *
 * Revision 1.5  92/09/14  14:54:51  ian
 * Fix a tiny null-pointer bug in previous fix for tar archive + uncompress.
 *
 */
--- a/ext/fileinfo/libmagic/softmagic.c
+++ b/ext/fileinfo/libmagic/softmagic.c
@ -32,7 +32,7 @@
 #include "file.h"
 #ifndef	lint
-FILE_RCSID("@(#)$File: softmagic.c,v 1.299 2020/06/07 21:58:01 christos Exp $")
+FILE_RCSID("@(#)$File: softmagic.c,v 1.309 2021/02/05 22:29:07 christos Exp $")
 #endif	/* lint */
 #include "magic.h"
@ -169,6 +169,8 @@ file_fmtcheck(struct magic_set *ms, const char *desc, const char *def,
 #define F(a, b, c) ((b))
 #endif
 /* NOTE this function has been kept an the state of 5.39 for BC. Observe
 * further as the upgrade to 5.41 or above goes. */
 /*
 * Go through the whole list, stopping if you find a match.  Process all
 * the continuations of that match before returning.
@ -498,6 +500,28 @@ check_fmt(struct magic_set *ms, const char *fmt)
 	return rv;
 }
 #if !defined(HAVE_STRNDUP) || defined(__aiws__) || defined(_AIX)
 # if defined(__aiws__) || defined(_AIX)
 #  define strndup aix_strndup	/* aix is broken */
 # endif
 char *strndup(const char *, size_t);
 char *
 strndup(const char *str, size_t n)
 {
 	size_t len;
 	char *copy;
 	for (len = 0; len < n && str[len]; len++)
 		continue;
 	if ((copy = malloc(len + 1)) == NULL)
 		return NULL;
 	(void)memcpy(copy, str, len);
 	copy[len] = '\0';
 	return copy;
 }
 #endif /* HAVE_STRNDUP */
 static int
 varexpand(struct magic_set *ms, char *buf, size_t len, const char *str)
 {
@ -569,93 +593,58 @@ mprint(struct magic_set *ms, struct magic *m)
 	else
 		desc = ebuf;
 #define	PRINTER(value, format, stype, utype)	\
 	v = file_signextend(ms, m, CAST(uint64_t, value)); \
 	switch (check_fmt(ms, desc)) { \
 	case -1: \
 		return -1; \
 	case 1: \
 		if (m->flag & UNSIGNED) { \
 			(void)snprintf(buf, sizeof(buf), "%" format "u", \
 			    CAST(utype, v)); \
 		} else { \
 			(void)snprintf(buf, sizeof(buf), "%" format "d", \
 			    CAST(stype, v)); \
 		} \
 		if (file_printf(ms, F(ms, desc, "%s"), buf) == -1) \
 			return -1; \
 		break; \
 	default: \
 		if (m->flag & UNSIGNED) { \
 		       if (file_printf(ms, F(ms, desc, "%" format "u"), \
 			   CAST(utype, v)) == -1) \
 			   return -1; \
 		} else { \
 		       if (file_printf(ms, F(ms, desc, "%" format "d"), \
 			   CAST(stype, v)) == -1) \
 			   return -1; \
 		} \
 		break; \
 	} \
 	t = ms->offset + sizeof(stype); \
 	break
  	switch (m->type) {
  	case FILE_BYTE:
-		v = file_signextend(ms, m, CAST(uint64_t, p->b));
+		PRINTER(p->b, "", int8_t, uint8_t);
 		switch (check_fmt(ms, desc)) {
 		case -1:
 			return -1;
 		case 1:
 			(void)snprintf(buf, sizeof(buf), "%d",
 			    CAST(unsigned char, v));
 			if (file_printf(ms, F(ms, desc, "%s"), buf) == -1)
 				return -1;
 			break;
 		default:
 			if (file_printf(ms, F(ms, desc, "%d"),
 			    CAST(unsigned char, v)) == -1)
 				return -1;
 			break;
 		}
 		t = ms->offset + sizeof(char);
 		break;
  	case FILE_SHORT:
  	case FILE_BESHORT:
  	case FILE_LESHORT:
-		v = file_signextend(ms, m, CAST(uint64_t, p->h));
+		PRINTER(p->h, "", int16_t, uint16_t);
 		switch (check_fmt(ms, desc)) {
 		case -1:
 			return -1;
 		case 1:
 			(void)snprintf(buf, sizeof(buf), "%u",
 			    CAST(unsigned short, v));
 			if (file_printf(ms, F(ms, desc, "%s"), buf) == -1)
 				return -1;
 			break;
 		default:
 			if (file_printf(ms, F(ms, desc, "%u"),
 			    CAST(unsigned short, v)) == -1)
 				return -1;
 			break;
 		}
 		t = ms->offset + sizeof(short);
 		break;
  	case FILE_LONG:
  	case FILE_BELONG:
  	case FILE_LELONG:
  	case FILE_MELONG:
-		v = file_signextend(ms, m, CAST(uint64_t, p->l));
+		PRINTER(p->l, "", int32_t, uint32_t);
 		switch (check_fmt(ms, desc)) {
 		case -1:
 			return -1;
 		case 1:
 			(void)snprintf(buf, sizeof(buf), "%u",
 			    CAST(uint32_t, v));
 			if (file_printf(ms, F(ms, desc, "%s"), buf) == -1)
 				return -1;
 			break;
 		default:
 			if (file_printf(ms, F(ms, desc, "%u"),
 			    CAST(uint32_t, v)) == -1)
 				return -1;
 			break;
 		}
 		t = ms->offset + sizeof(int32_t);
  		break;
  	case FILE_QUAD:
  	case FILE_BEQUAD:
  	case FILE_LEQUAD:
 	case FILE_OFFSET:
-		v = file_signextend(ms, m, p->q);
+		PRINTER(p->q, INT64_T_FORMAT, long long, unsigned long long);
 		switch (check_fmt(ms, desc)) {
 		case -1:
 			return -1;
 		case 1:
 			(void)snprintf(buf, sizeof(buf), "%" INT64_T_FORMAT "u",
 			    CAST(unsigned long long, v));
 			if (file_printf(ms, F(ms, desc, "%s"), buf) == -1)
 				return -1;
 			break;
 		default:
 			if (file_printf(ms, F(ms, desc, "%" INT64_T_FORMAT "u"),
 			    CAST(unsigned long long, v)) == -1)
 				return -1;
 			break;
 		}
 		t = ms->offset + sizeof(int64_t);
  		break;
  	case FILE_STRING:
@ -678,19 +667,9 @@ mprint(struct magic_set *ms, struct magic *m)
 			if (*m->value.s == '\0')
 				str[strcspn(str, "\r\n")] = '\0';
-			if (m->str_flags & STRING_TRIM) {
+			if (m->str_flags & STRING_TRIM)
-				char *last;
+				str = file_strtrim(str);
-				while (isspace(CAST(unsigned char, *str)))
+					
 					str++;
 				last = str;
 				while (*last)
 					last++;
 				--last;
 				while (isspace(CAST(unsigned char, *last)))
 					last--;
 				*++last = '\0';
 			}
 			if (file_printf(ms, F(ms, desc, "%s"),
 			    file_printable(sbuf, sizeof(sbuf), str,
 				sizeof(p->s) - (str - p->s))) == -1)
@ -795,14 +774,20 @@ mprint(struct magic_set *ms, struct magic *m)
 	case FILE_SEARCH:
 	case FILE_REGEX: {
-		char *cp;
+		char *cp, *scp;
 		int rval;
-		cp = estrndup(RCAST(const char *, ms->search.s),
+		cp = strndup(RCAST(const char *, ms->search.s),
 		    ms->search.rm_len);
 		if (cp == NULL) {
 			file_oomem(ms, ms->search.rm_len);
 			return -1;
 		}
 		scp = (m->str_flags & STRING_TRIM) ? file_strtrim(cp) : cp;
 		rval = file_printf(ms, F(ms, desc, "%s"),
-		    file_printable(sbuf, sizeof(sbuf), cp, ms->search.rm_len));
+		    file_printable(sbuf, sizeof(sbuf), scp, ms->search.rm_len));
-		efree(cp);
+		free(cp);
 		if (rval == -1)
 			return -1;
@ -955,6 +940,7 @@ moffset(struct magic_set *ms, struct magic *m, const struct buffer *b,
 	case FILE_DEFAULT:
 	case FILE_INDIRECT:
 	case FILE_OFFSET:
 	case FILE_USE:
 		o = ms->offset;
 		break;
@ -1541,6 +1527,28 @@ normal:
 	return 0;
 }
 private int
 save_cont(struct magic_set *ms, struct cont *c)
 {
 	size_t len;
 	*c = ms->c;
 	len = c->len * sizeof(*c->li);
 	ms->c.li = CAST(struct level_info *, malloc(len));
 	if (ms->c.li == NULL) {
 		ms->c = *c;
 		return -1;
 	}
 	memcpy(ms->c.li, c->li, len);
 	return 0;
 }
 private void
 restore_cont(struct magic_set *ms, struct cont *c)
 {
 	free(ms->c.li);
 	ms->c = *c;
 }
 private int
 mget(struct magic_set *ms, struct magic *m, const struct buffer *b,
    const unsigned char *s, size_t nbytes, size_t o, unsigned int cont_level,
@ -1548,14 +1556,15 @@ mget(struct magic_set *ms, struct magic *m, const struct buffer *b,
    int *printed_something, int *need_separator, int *returnval,
    int *found_match)
 {
-	uint32_t offset = ms->offset;
+	uint32_t eoffset, offset = ms->offset;
 	struct buffer bb;
 	intmax_t lhs;
 	file_pushbuf_t *pb;
-	int rv, oneed_separator, in_type;
+	int rv, oneed_separator, in_type, nfound_match;
 	char *rbuf;
 	union VALUETYPE *p = &ms->ms_value;
 	struct mlist ml;
 	struct cont c;
 	if (*indir_count >= ms->indir_max) {
 		file_error(ms, 0, "indirect count (%hu) exceeded",
@ -1836,7 +1845,8 @@ mget(struct magic_set *ms, struct magic *m, const struct buffer *b,
 		if (rv == 1) {
 			if ((ms->flags & MAGIC_NODESC) == 0 &&
-			    file_printf(ms, F(ms, m->desc, "%u"), offset) == -1) {
+			    file_printf(ms, F(ms, m->desc, "%u"), offset) == -1)
 			{
 				if (rbuf) efree(rbuf);
 				return -1;
 			}
@ -1860,16 +1870,32 @@ mget(struct magic_set *ms, struct magic *m, const struct buffer *b,
 			file_error(ms, 0, "cannot find entry `%s'", rbuf);
 			return -1;
 		}
-		(*name_count)++;
+		if (save_cont(ms, &c) == -1) {
 			file_error(ms, errno, "can't allocate continuation");
 			return -1;
 		}
 		oneed_separator = *need_separator;
 		if (m->flag & NOSPACE)
 			*need_separator = 0;
 		nfound_match = 0;
 		(*name_count)++;
 		eoffset = ms->eoffset;
 		rv = match(ms, ml.magic, ml.nmagic, b, offset + o,
 		    mode, text, flip, indir_count, name_count,
-		    printed_something, need_separator, returnval, found_match);
+		    printed_something, need_separator, returnval,
 		    &nfound_match);
 		ms->ms_value.q = nfound_match;
 		(*name_count)--;
 		*found_match |= nfound_match;
 		restore_cont(ms, &c);
 		if (rv != 1)
 		    *need_separator = oneed_separator;
 		ms->offset = offset;
 		ms->eoffset = eoffset;
 		return rv;
 	case FILE_NAME:
@ -1934,13 +1960,10 @@ file_strncmp(const char *s1, const char *s2, size_t len, size_t maxlen,
 			}
 			else if ((flags & STRING_COMPACT_WHITESPACE) &&
 			    isspace(*a)) {
 				/* XXX Dirty. The data and the pattern is what is causing this.
 				       Revert _i for the next port and see if it still matters. */
 				uint32_t _i = 0;
 				a++;
 				if (isspace(*b++)) {
 					if (!isspace(*a))
-						while (EXPECTED(_i++ < 2048) && b < eb && isspace(*b))
+						while (b < eb && isspace(*b))
 							b++;
 				}
 				else {
@ -2282,9 +2305,10 @@ error_out:
 		}
 		break;
 	}
 	case FILE_INDIRECT:
 	case FILE_USE:
 		return ms->ms_value.q != 0;
 	case FILE_NAME:
 	case FILE_INDIRECT:
 		return 1;
 	case FILE_DER:
 		matched = der_cmp(ms, m);
--- a/ext/fileinfo/magicdata.patch
+++ b/ext/fileinfo/magicdata.patch
@ -1,99 +1,22 @@
-diff -u magic.orig/Magdir/images magic/Magdir/images
+diff -ur Magdir.orig/mail.news Magdir/mail.news
--- magic.orig/Magdir/images	2020-05-31 12:34:40.000000000 +0200
+--- Magdir.orig/mail.news	2021-03-31 01:47:28.000000000 +0200
-+++ magic/Magdir/images	2020-07-05 20:00:41.664783368 +0200
+++ Magdir/mail.news	2021-04-05 19:41:55.168556972 +0200
-@@ -1,6 +1,6 @@
+@@ -1,5 +1,5 @@
 #------------------------------------------------------------------------------
-# $File: images,v 1.181 2020/05/30 23:49:03 christos Exp $
+-# $File: mail.news,v 1.26 2021/03/21 14:37:03 christos Exp $
-+# $File: images,v 1.183 2020/06/26 17:08:32 christos Exp $
+# $File: mail.news,v 1.27 2021/04/05 16:36:14 christos Exp $
- # images:  file(1) magic for image formats (see also "iff", and "c-lang" for
+ # mail.news:  file(1) magic for mail and news
 # XPM bitmaps)
 #
-@@ -32,22 +32,22 @@
+ # Unfortunately, saved netnews also has From line added in some news software.
- # Prevent conflicts with CRI ADX.
+@@ -81,4 +81,4 @@
- >(2.S-2) belong	!0x28632943
+ # File format spec: https://wiki.dovecot.org/Design/Dcrypt/#File_format
- # skip more garbage like *.iso by looking for positive image type
+ # From: Stephen Gildea
->>2	ubyte			>0
+ 0	string	CRYPTED\003\007		Dovecot encrypted message
-+>2	ubyte			>0
+->9	byte	xu			\b, dcrypt version %d
- # skip some compiled terminfo like xterm+tmux by looking for image type less equal 33
+>9	byte	x			\b, dcrypt version %d
->>>2	ubyte			<34
+diff -ur Magdir.orig/rpm Magdir/rpm
-+>>2	ubyte			<34
+--- Magdir.orig/rpm	2021-02-23 01:49:24.000000000 +0100
- # skip arches.3200 , Finder.Root , Slp.1 by looking for low pixel depth 1 8 15 16 24 32
+++ Magdir/rpm	2021-04-05 19:40:55.080911893 +0200
 ->>>>16	ubyte			1
 ->>>>>0		use		tga-image
 ->>>>16	ubyte			8
 ->>>>>0		use		tga-image
 ->>>>16	ubyte			15
 ->>>>>0		use		tga-image
 ->>>>16	ubyte			16
 ->>>>>0		use		tga-image
 ->>>>16	ubyte			24
 ->>>>>0		use		tga-image
 ->>>>16	ubyte			32
 ->>>>>0		use		tga-image
 +>>>16	ubyte			1
 +>>>>0		use		tga-image
 +>>>16	ubyte			8
 +>>>>0		use		tga-image
 +>>>16	ubyte			15
 +>>>>0		use		tga-image
 +>>>16	ubyte			16
 +>>>>0		use		tga-image
 +>>>16	ubyte			24
 +>>>>0		use		tga-image
 +>>>16	ubyte			32
 +>>>>0		use		tga-image
 #	display tga bitmap image information
 0	name				tga-image
 >2	ubyte		<34		Targa image data
@@ -615,7 +615,7 @@
 0	leshort		40
 # skip bad samples like GAME by looking for valid number of color planes
 >12	uleshort	1		Device independent bitmap graphic
 -!:mime	image/bmp
 +!:mime	image/x-ms-bmp
 !:apple	????BMPp
 !:ext	dib
 >>4	lelong		x		\b, %d x
@@ -641,7 +641,7 @@
 >>18	leshort		x		\b, %d x
 >>20	leshort		x		%d
 >14	leshort		64		PC bitmap, OS/2 2.x format
 -!:mime	image/bmp
 +!:mime	image/x-ms-bmp
 !:apple	????BMPp
 !:ext	bmp
 # image width and height fields are unsigned integers for OS/2
@@ -662,7 +662,7 @@
 #>>(10.l) ubequad		!0	\b, bits 0x%16.16llx
 # BITMAPV2INFOHEADER	adds RGB bit masks
 >14	leshort		52		PC bitmap, Adobe Photoshop
 -!:mime	image/bmp
 +!:mime	image/x-ms-bmp
 !:apple	????BMPp
 !:ext	bmp
 >>18	lelong		x		\b, %d x
@@ -670,7 +670,7 @@
 >>28	leshort		x		%d
 # BITMAPV3INFOHEADER	adds alpha channel bit mask
 >14	leshort		56		PC bitmap, Adobe Photoshop with alpha channel mask
 -!:mime	image/bmp
 +!:mime	image/x-ms-bmp
 !:apple	????BMPp
 !:ext	bmp
 >>18	lelong		x		\b, %d x
@@ -679,7 +679,7 @@
 >14	leshort		40
 # jump 4 bytes before end of file/header to skip fmt-116-signature-id-118.dib
 >>(2.l-4)	ulong	x		PC bitmap, Windows 3.x format
 -!:mime	image/bmp
 +!:mime	image/x-ms-bmp
 !:apple	????BMPp
 >>>18	lelong		x		\b, %d x
 >>>22	lelong		x		%d
 diff -u magic.orig/Magdir/rpm magic/Magdir/rpm
 --- magic.orig/Magdir/rpm	2019-02-22 14:06:34.000000000 +0100
 +++ magic/Magdir/rpm	2020-07-05 19:38:02.720419674 +0200
@@ -29,6 +29,7 @@
 >>8	beshort		17		SuperH
 >>8	beshort		18		Xtensa
@ -102,9 +25,9 @@ diff -u magic.orig/Magdir/rpm magic/Magdir/rpm
 #delta RPM    Daniel Novotny (dnovotny@redhat.com)
 0	string		drpm		Delta RPM
-diff -u magic.orig/Magdir/securitycerts magic/Magdir/securitycerts
+diff -ur Magdir.orig/securitycerts Magdir/securitycerts
--- magic.orig/Magdir/securitycerts	2019-02-22 14:06:34.000000000 +0100
+--- Magdir.orig/securitycerts	2021-02-23 01:49:24.000000000 +0100
-+++ magic/Magdir/securitycerts	2020-07-05 19:38:02.720419674 +0200
+++ Magdir/securitycerts	2021-04-05 19:40:55.080911893 +0200
@@ -4,3 +4,5 @@
 0	search/1		-----BEGIN\ CERTIFICATE------	RFC1421 Security Certificate text
 0	search/1		-----BEGIN\ NEW\ CERTIFICATE	RFC1421 Security Certificate Signing Request text
--- a/ext/fileinfo/tests/bug68819_001.phpt
+++ b/ext/fileinfo/tests/bug68819_001.phpt
@ -15,4 +15,4 @@ $type = $finfo->buffer($string);
 var_dump($type);
 ?>
 --EXPECT--
-string(60) "ASCII text, with very long lines, with CRLF line terminators"
+string(66) "ASCII text, with very long lines (617), with CRLF line terminators"
--- a/ext/fileinfo/tests/bug68819_002.phpt
+++ b/ext/fileinfo/tests/bug68819_002.phpt
@ -23,4 +23,4 @@ var_dump($type);
 ?>
 --EXPECT--
-string(60) "ASCII text, with very long lines, with CRLF line terminators"
+string(67) "ASCII text, with very long lines (8191), with CRLF line terminators"
--- a/ext/fileinfo/tests/finfo_file_basic.phpt
+++ b/ext/fileinfo/tests/finfo_file_basic.phpt
@ -24,5 +24,5 @@ try {
 *** Testing finfo_file() : basic functionality ***
 string(28) "text/x-php; charset=us-ascii"
 string(22) "PHP script, ASCII text"
-string(25) "text/plain; charset=utf-8"
+string(28) "text/plain; charset=us-ascii"
 finfo_file(): Argument #1 ($finfo) must not contain any null bytes
--- a/ext/fileinfo/tests/magic
+++ b/ext/fileinfo/tests/magic
--- a/ext/fileinfo/tests/magic私はガラスを食べられます
+++ b/ext/fileinfo/tests/magic私はガラスを食べられます