fileinfo: Port libmagic 5.40

Signed-off-by: Anatol Belski <ab@php.net>
2024-11-30 21:35:36 +08:00 · 2021-04-01 20:15:45 +02:00 · 2021-04-01 20:15:45 +02:00 · 3b9173dc8f
commit 3b9173dc8f
parent 22019a1edd
21 changed files with 174944 additions and 148884 deletions
--- a/ext/fileinfo/data_file.c
+++ b/ext/fileinfo/data_file.c
--- a/ext/fileinfo/libmagic.patch
+++ b/ext/fileinfo/libmagic.patch
--- a/ext/fileinfo/libmagic/apprentice.c
+++ b/ext/fileinfo/libmagic/apprentice.c
@ -34,11 +34,10 @@
 #include "file.h"

 #ifndef	lint
-FILE_RCSID("@(#)$File: apprentice.c,v 1.297 2020/05/09 18:57:15 christos Exp $")
+FILE_RCSID("@(#)$File: apprentice.c,v 1.301 2021/02/23 00:51:11 christos Exp $")
 #endif	/* lint */

 #include "magic.h"
-#include "patchlevel.h"
 #include <stdlib.h>

 #if defined(__hpux) && !defined(HAVE_STRTOULL)
@ -536,6 +535,7 @@ file_ms_alloc(int flags)
 	ms->elf_notes_max = FILE_ELF_NOTES_MAX;
 	ms->regex_max = FILE_REGEX_MAX;
 	ms->bytes_max = FILE_BYTES_MAX;
+	ms->encoding_max = FILE_ENCODING_MAX;
 	return ms;
 free:
 	efree(ms);
@ -1416,7 +1416,10 @@ apprentice_load(struct magic_set *ms, const char *fn, int action)
 		 */
 		set_last_default(ms, mset[j].me, mset[j].count);

-		/* coalesce per file arrays into a single one */
+		/* coalesce per file arrays into a single one, if needed */
+		if (mset[j].count == 0)
+			continue;
+		      
 		if (coalesce_entries(ms, mset[j].me, mset[j].count,
 		    &map->magic[j], &map->nmagic[j]) == -1) {
 			errs++;
@ -2086,6 +2089,13 @@ parse(struct magic_set *ms, struct magic_entry *me, const char *line,
 		return -1;
 	}

+	if (m->type == FILE_NAME && cont_level != 0) {
+		if (ms->flags & MAGIC_CHECK)
+			file_magwarn(ms, "`name%s' entries can only be "
+			    "declared at top level", l);
+		return -1;
+	}
+
 	/* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
 	/* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */

@ -2699,7 +2709,7 @@ getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
 		ull = CAST(uint64_t, strtoull(*p, &ep, 0));
 		m->value.q = file_signextend(ms, m, ull);
 		if (*p == ep) {
-			file_magwarn(ms, "Unparseable number `%s'", *p);
+			file_magwarn(ms, "Unparsable number `%s'", *p);
 		} else {
 			size_t ts = typesize(m->type);
 			uint64_t x;
@ -3101,8 +3111,8 @@ internal_loaded:
 	else
 		version = ptr[1];
 	if (version != VERSIONNO) {
-		file_error(ms, 0, "File %d.%d supports only version %d magic "
-		    "files. `%s' is version %d", FILE_VERSION_MAJOR, patchlevel,
+		file_error(ms, 0, "File %d supports only version %d magic "
+		    "files. `%s' is version %d", MAGIC_VERSION,
 		    VERSIONNO, dbname, version);
 		goto error;
 	}
--- a/ext/fileinfo/libmagic/ascmagic.c
+++ b/ext/fileinfo/libmagic/ascmagic.c
@ -35,7 +35,7 @@
 #include "file.h"

 #ifndef	lint
-FILE_RCSID("@(#)$File: ascmagic.c,v 1.107 2020/06/08 19:58:36 christos Exp $")
+FILE_RCSID("@(#)$File: ascmagic.c,v 1.109 2021/02/05 23:01:40 christos Exp $")
 #endif	/* lint */

 #include "magic.h"
@ -50,7 +50,8 @@ FILE_RCSID("@(#)$File: ascmagic.c,v 1.107 2020/06/08 19:58:36 christos Exp $")
 #define ISSPC(x) ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' \
 		  || (x) == 0x85 || (x) == '\f')

-private unsigned char *encode_utf8(unsigned char *, size_t, unicodechar *, size_t);
+private unsigned char *encode_utf8(unsigned char *, size_t, file_unichar_t *,
+    size_t);
 private size_t trim_nuls(const unsigned char *, size_t);

 /*
@ -69,7 +70,7 @@ trim_nuls(const unsigned char *buf, size_t nbytes)
 protected int
 file_ascmagic(struct magic_set *ms, const struct buffer *b, int text)
 {
-	unicodechar *ubuf = NULL;
+	file_unichar_t *ubuf = NULL;
 	size_t ulen = 0;
 	int rv = 1;
 	struct buffer bb;
@ -101,9 +102,9 @@ file_ascmagic(struct magic_set *ms, const struct buffer *b, int text)
 }

 protected int
-file_ascmagic_with_encoding(struct magic_set *ms,
-    const struct buffer *b, unicodechar *ubuf, size_t ulen, const char *code,
-    const char *type, int text)
+file_ascmagic_with_encoding(struct magic_set *ms, const struct buffer *b,
+    file_unichar_t *ubuf, size_t ulen, const char *code, const char *type,
+    int text)
 {
 	struct buffer bb;
 	const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
@ -127,7 +128,7 @@ file_ascmagic_with_encoding(struct magic_set *ms,
 	int executable = 0;

 	size_t last_line_end = CAST(size_t, -1);
-	int has_long_lines = 0;
+	size_t has_long_lines = 0;

 	nbytes = trim_nuls(buf, nbytes);

@ -190,8 +191,11 @@ file_ascmagic_with_encoding(struct magic_set *ms,
 		}

 		/* If this line is _longer_ than MAXLINELEN, remember it. */
-		if (i > last_line_end + MAXLINELEN)
-			has_long_lines = 1;
+		if (i > last_line_end + MAXLINELEN) {
+			size_t ll = i - last_line_end;
+			if (ll > has_long_lines)
+				has_long_lines = ll;
+		}

 		if (ubuf[i] == '\033')
 			has_escapes = 1;
@ -269,7 +273,8 @@ file_ascmagic_with_encoding(struct magic_set *ms,
 				goto done;

 		if (has_long_lines)
-			if (file_printf(ms, ", with very long lines") == -1)
+			if (file_printf(ms, ", with very long lines (%zu)",
+			    has_long_lines) == -1)
 				goto done;

 		/*
@ -281,7 +286,8 @@ file_ascmagic_with_encoding(struct magic_set *ms,
 			if (file_printf(ms, ", with") == -1)
 				goto done;

-			if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) {
+			if (n_crlf == 0 && n_cr == 0 &&
+			    n_nel == 0 && n_lf == 0) {
 				if (file_printf(ms, " no") == -1)
 					goto done;
 			} else {
@ -335,7 +341,7 @@ done:
 * after end of string, or NULL if an invalid character is found.
 */
 private unsigned char *
-encode_utf8(unsigned char *buf, size_t len, unicodechar *ubuf, size_t ulen)
+encode_utf8(unsigned char *buf, size_t len, file_unichar_t *ubuf, size_t ulen)
 {
 	size_t i;
 	unsigned char *end = buf + len;
@ -345,43 +351,45 @@ encode_utf8(unsigned char *buf, size_t len, unicodechar *ubuf, size_t ulen)
 			if (end - buf < 1)
 				return NULL;
 			*buf++ = CAST(unsigned char, ubuf[i]);
-		} else if (ubuf[i] <= 0x7ff) {
+			continue;
+		} 
+		if (ubuf[i] <= 0x7ff) {
 			if (end - buf < 2)
 				return NULL;
 			*buf++ = CAST(unsigned char, (ubuf[i] >> 6) + 0xc0);
-			*buf++ = CAST(unsigned char, (ubuf[i] & 0x3f) + 0x80);
-		} else if (ubuf[i] <= 0xffff) {
+			goto out1;
+		}
+		if (ubuf[i] <= 0xffff) {
 			if (end - buf < 3)
 				return NULL;
 			*buf++ = CAST(unsigned char, (ubuf[i] >> 12) + 0xe0);
-			*buf++ = CAST(unsigned char, ((ubuf[i] >> 6) & 0x3f) + 0x80);
-			*buf++ = CAST(unsigned char, (ubuf[i] & 0x3f) + 0x80);
-		} else if (ubuf[i] <= 0x1fffff) {
+			goto out2;
+		}
+		if (ubuf[i] <= 0x1fffff) {
 			if (end - buf < 4)
 				return NULL;
 			*buf++ = CAST(unsigned char, (ubuf[i] >> 18) + 0xf0);
-			*buf++ = CAST(unsigned char, ((ubuf[i] >> 12) & 0x3f) + 0x80);
-			*buf++ = CAST(unsigned char, ((ubuf[i] >>  6) & 0x3f) + 0x80);
-			*buf++ = CAST(unsigned char, (ubuf[i] & 0x3f) + 0x80);
-		} else if (ubuf[i] <= 0x3ffffff) {
+			goto out3;
+		}
+		if (ubuf[i] <= 0x3ffffff) {
 			if (end - buf < 5)
 				return NULL;
 			*buf++ = CAST(unsigned char, (ubuf[i] >> 24) + 0xf8);
-			*buf++ = CAST(unsigned char, ((ubuf[i] >> 18) & 0x3f) + 0x80);
-			*buf++ = CAST(unsigned char, ((ubuf[i] >> 12) & 0x3f) + 0x80);
-			*buf++ = CAST(unsigned char, ((ubuf[i] >>  6) & 0x3f) + 0x80);
-			*buf++ = CAST(unsigned char, (ubuf[i] & 0x3f) + 0x80);
-		} else if (ubuf[i] <= 0x7fffffff) {
+			goto out4;
+		} 
+		if (ubuf[i] <= 0x7fffffff) {
 			if (end - buf < 6)
 				return NULL;
 			*buf++ = CAST(unsigned char, (ubuf[i] >> 30) + 0xfc);
-			*buf++ = CAST(unsigned char, ((ubuf[i] >> 24) & 0x3f) + 0x80);
-			*buf++ = CAST(unsigned char, ((ubuf[i] >> 18) & 0x3f) + 0x80);
-			*buf++ = CAST(unsigned char, ((ubuf[i] >> 12) & 0x3f) + 0x80);
-			*buf++ = CAST(unsigned char, ((ubuf[i] >>  6) & 0x3f) + 0x80);
-			*buf++ = CAST(unsigned char, (ubuf[i] & 0x3f) + 0x80);
-		} else /* Invalid character */
-			return NULL;
+			goto out5;
+		} 
+		/* Invalid character */
+		return NULL;
+	out5:	*buf++ = CAST(unsigned char, ((ubuf[i] >> 24) & 0x3f) + 0x80);
+	out4:	*buf++ = CAST(unsigned char, ((ubuf[i] >> 18) & 0x3f) + 0x80);
+	out3:	*buf++ = CAST(unsigned char, ((ubuf[i] >> 12) & 0x3f) + 0x80);
+	out2:	*buf++ = CAST(unsigned char, ((ubuf[i] >>  6) & 0x3f) + 0x80);
+	out1:	*buf++ = CAST(unsigned char, ((ubuf[i] >>  0) & 0x3f) + 0x80);
 	}

 	return buf;
--- a/ext/fileinfo/libmagic/compress.c
+++ b/ext/fileinfo/libmagic/compress.c
@ -35,7 +35,7 @@
 #include "file.h"

 #ifndef lint
-FILE_RCSID("@(#)$File: compress.c,v 1.127 2020/05/31 00:11:06 christos Exp $")
+FILE_RCSID("@(#)$File: compress.c,v 1.129 2020/12/08 21:26:00 christos Exp $")
 #endif

 #include "magic.h"
@ -72,7 +72,7 @@ typedef void (*sig_t)(int);
 #include <bzlib.h>
 #endif

-#if defined(HAVE_XZLIB_H) && defined(XZLIBSUPPORT)
+#if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT)
 #define BUILTIN_XZLIB
 #include <lzma.h>
 #endif
@ -847,8 +847,23 @@ uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old,
 	for (i = 0; i < __arraycount(fdp); i++)
 		fdp[i][0] = fdp[i][1] = -1;

-	if ((fd == -1 && pipe(fdp[STDIN_FILENO]) == -1) ||
-	    pipe(fdp[STDOUT_FILENO]) == -1 || pipe(fdp[STDERR_FILENO]) == -1) {
+	/*
+	 * There are multithreaded users who run magic_file()
+	 * from dozens of threads. If two parallel magic_file() calls
+	 * analyze two large compressed files, both will spawn
+	 * an uncompressing child here, which writes out uncompressed data.
+	 * We read some portion, then close the pipe, then waitpid() the child.
+	 * If uncompressed data is larger, child shound get EPIPE and exit.
+	 * However, with *parallel* calls OTHER child may unintentionally
+	 * inherit pipe fds, thus keeping pipe open and making writes in
+	 * our child block instead of failing with EPIPE!
+	 * (For the bug to occur, two threads must mutually inherit their pipes,
+	 * and both must have large outputs. Thus it happens not that often).
+	 * To avoid this, be sure to create pipes with O_CLOEXEC.
+	 */
+	if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) ||
+	    file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 ||
+	    file_pipe_closexec(fdp[STDERR_FILENO]) == -1) {
 		closep(fdp[STDIN_FILENO]);
 		closep(fdp[STDOUT_FILENO]);
 		return makeerror(newch, n, "Cannot create pipe, %s",
@ -879,16 +894,20 @@ uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old,
 			if (fdp[STDIN_FILENO][1] > 2)
 				(void) close(fdp[STDIN_FILENO][1]);
 		}
+		file_clear_closexec(STDIN_FILENO);
+
 ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
 		if (copydesc(STDOUT_FILENO, fdp[STDOUT_FILENO][1]))
 			(void) close(fdp[STDOUT_FILENO][1]);
 		if (fdp[STDOUT_FILENO][0] > 2)
 			(void) close(fdp[STDOUT_FILENO][0]);
+		file_clear_closexec(STDOUT_FILENO);

 		if (copydesc(STDERR_FILENO, fdp[STDERR_FILENO][1]))
 			(void) close(fdp[STDERR_FILENO][1]);
 		if (fdp[STDERR_FILENO][0] > 2)
 			(void) close(fdp[STDERR_FILENO][0]);
+		file_clear_closexec(STDERR_FILENO);

 		(void)execvp(compr[method].argv[0],
 		    RCAST(char *const *, RCAST(intptr_t, compr[method].argv)));
--- a/ext/fileinfo/libmagic/config.h
+++ b/ext/fileinfo/libmagic/config.h
@ -1 +0,0 @@
-#include "php.h"
--- a/ext/fileinfo/libmagic/der.c
+++ b/ext/fileinfo/libmagic/der.c
@ -35,7 +35,7 @@
 #include "file.h"

 #ifndef lint
-FILE_RCSID("@(#)$File: der.c,v 1.20 2020/06/07 19:10:37 christos Exp $")
+FILE_RCSID("@(#)$File: der.c,v 1.21 2020/06/15 00:58:10 christos Exp $")
 #endif
 #else
 #define SIZE_T_FORMAT "z"
@ -249,7 +249,6 @@ der_data(char *buf, size_t blen, uint32_t tag, const void *q, uint32_t len)
 		return snprintf(buf, blen,
 		    "20%c%c-%c%c-%c%c %c%c:%c%c:%c%c GMT", d[0], d[1], d[2],
 		    d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11]);
-		break;
 	default:
 		break;
 	}
--- a/ext/fileinfo/libmagic/encoding.c
+++ b/ext/fileinfo/libmagic/encoding.c
@ -35,7 +35,7 @@
 #include "file.h"

 #ifndef	lint
-FILE_RCSID("@(#)$File: encoding.c,v 1.21 2019/06/08 20:49:14 christos Exp $")
+FILE_RCSID("@(#)$File: encoding.c,v 1.27 2021/02/05 21:33:49 christos Exp $")
 #endif	/* lint */

 #include "magic.h"
@ -43,14 +43,20 @@ FILE_RCSID("@(#)$File: encoding.c,v 1.21 2019/06/08 20:49:14 christos Exp $")
 #include <stdlib.h>


-private int looks_ascii(const unsigned char *, size_t, unicodechar *, size_t *);
-private int looks_utf8_with_BOM(const unsigned char *, size_t, unicodechar *,
+private int looks_ascii(const unsigned char *, size_t, file_unichar_t *,
+    size_t *);
+private int looks_utf8_with_BOM(const unsigned char *, size_t, file_unichar_t *,
+    size_t *);
+private int looks_utf7(const unsigned char *, size_t, file_unichar_t *,
+    size_t *);
+private int looks_ucs16(const unsigned char *, size_t, file_unichar_t *,
+    size_t *);
+private int looks_ucs32(const unsigned char *, size_t, file_unichar_t *,
+    size_t *);
+private int looks_latin1(const unsigned char *, size_t, file_unichar_t *,
+    size_t *);
+private int looks_extended(const unsigned char *, size_t, file_unichar_t *,
    size_t *);
-private int looks_utf7(const unsigned char *, size_t, unicodechar *, size_t *);
-private int looks_ucs16(const unsigned char *, size_t, unicodechar *, size_t *);
-private int looks_ucs32(const unsigned char *, size_t, unicodechar *, size_t *);
-private int looks_latin1(const unsigned char *, size_t, unicodechar *, size_t *);
-private int looks_extended(const unsigned char *, size_t, unicodechar *, size_t *);
 private void from_ebcdic(const unsigned char *, size_t, unsigned char *);

 #ifdef DEBUG_ENCODING
@ -62,19 +68,20 @@ private void from_ebcdic(const unsigned char *, size_t, unsigned char *);
 /*
 * Try to determine whether text is in some character code we can
 * identify.  Each of these tests, if it succeeds, will leave
- * the text converted into one-unicodechar-per-character Unicode in
+ * the text converted into one-file_unichar_t-per-character Unicode in
 * ubuf, and the number of characters converted in ulen.
 */
 protected int
-file_encoding(struct magic_set *ms, const struct buffer *b, unicodechar **ubuf,
-    size_t *ulen, const char **code, const char **code_mime, const char **type)
+file_encoding(struct magic_set *ms, const struct buffer *b,
+    file_unichar_t **ubuf, size_t *ulen, const char **code,
+    const char **code_mime, const char **type)
 {
 	const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
 	size_t nbytes = b->flen;
 	size_t mlen;
 	int rv = 1, ucs_type;
 	unsigned char *nbuf = NULL;
-	unicodechar *udefbuf;
+	file_unichar_t *udefbuf;
 	size_t udeflen;

 	if (ubuf == NULL)
@ -87,8 +94,12 @@ file_encoding(struct magic_set *ms, const struct buffer *b, unicodechar **ubuf,
 	*code = "unknown";
 	*code_mime = "binary";

+	if (nbytes > ms->encoding_max)
+		nbytes = ms->encoding_max;
+
 	mlen = (nbytes + 1) * sizeof((*ubuf)[0]);
-	if ((*ubuf = CAST(unicodechar *, ecalloc(CAST(size_t, 1), mlen))) == NULL) {
+	*ubuf = CAST(file_unichar_t *, ecalloc(CAST(size_t, 1), mlen));
+	if (*ubuf == NULL) {
 		file_oomem(ms, mlen);
 		goto done;
 	}
@ -102,7 +113,7 @@ file_encoding(struct magic_set *ms, const struct buffer *b, unicodechar **ubuf,
 	if (looks_ascii(buf, nbytes, *ubuf, ulen)) {
 		if (looks_utf7(buf, nbytes, *ubuf, ulen) > 0) {
 			DPRINTF(("utf-7 %" SIZE_T_FORMAT "u\n", *ulen));
-			*code = "UTF-7 Unicode";
+			*code = "Unicode text, UTF-7";
 			*code_mime = "utf-7";
 		} else {
 			DPRINTF(("ascii %" SIZE_T_FORMAT "u\n", *ulen));
@ -111,27 +122,27 @@ file_encoding(struct magic_set *ms, const struct buffer *b, unicodechar **ubuf,
 		}
 	} else if (looks_utf8_with_BOM(buf, nbytes, *ubuf, ulen) > 0) {
 		DPRINTF(("utf8/bom %" SIZE_T_FORMAT "u\n", *ulen));
-		*code = "UTF-8 Unicode (with BOM)";
+		*code = "Unicode text, UTF-8 (with BOM)";
 		*code_mime = "utf-8";
 	} else if (file_looks_utf8(buf, nbytes, *ubuf, ulen) > 1) {
 		DPRINTF(("utf8 %" SIZE_T_FORMAT "u\n", *ulen));
-		*code = "UTF-8 Unicode";
+		*code = "Unicode text, UTF-8";
 		*code_mime = "utf-8";
 	} else if ((ucs_type = looks_ucs32(buf, nbytes, *ubuf, ulen)) != 0) {
 		if (ucs_type == 1) {
-			*code = "Little-endian UTF-32 Unicode";
+			*code = "Unicode text, UTF-32, little-endian";
 			*code_mime = "utf-32le";
 		} else {
-			*code = "Big-endian UTF-32 Unicode";
+			*code = "Unicode text, UTF-32, big-endian";
 			*code_mime = "utf-32be";
 		}
 		DPRINTF(("ucs32 %" SIZE_T_FORMAT "u\n", *ulen));
 	} else if ((ucs_type = looks_ucs16(buf, nbytes, *ubuf, ulen)) != 0) {
 		if (ucs_type == 1) {
-			*code = "Little-endian UTF-16 Unicode";
+			*code = "Unicode text, UTF-16, little-endian";
 			*code_mime = "utf-16le";
 		} else {
-			*code = "Big-endian UTF-16 Unicode";
+			*code = "Unicode text, UTF-16, big-endian";
 			*code_mime = "utf-16be";
 		}
 		DPRINTF(("ucs16 %" SIZE_T_FORMAT "u\n", *ulen));
@ -249,64 +260,40 @@ private char text_chars[256] = {
 	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I   /* 0xfX */
 };

-private int
-looks_ascii(const unsigned char *buf, size_t nbytes, unicodechar *ubuf,
-    size_t *ulen)
-{
-	size_t i;
-
-	*ulen = 0;
-
-	for (i = 0; i < nbytes; i++) {
-		int t = text_chars[buf[i]];
-
-		if (t != T)
-			return 0;
-
-		ubuf[(*ulen)++] = buf[i];
-	}
-
-	return 1;
+#define LOOKS(NAME, COND) \
+private int \
+looks_ ## NAME(const unsigned char *buf, size_t nbytes, file_unichar_t *ubuf, \
+    size_t *ulen) \
+{ \
+	size_t i, u; \
+	unsigned char dist[256]; \
+	memset(dist, 0, sizeof(dist)); \
+\
+	*ulen = 0; \
+\
+	for (i = 0; i < nbytes; i++) { \
+		int t = text_chars[buf[i]]; \
+\
+		if (COND) \
+			return 0; \
+\
+		ubuf[(*ulen)++] = buf[i]; \
+		dist[buf[i]]++; \
+	} \
+	u = 0; \
+	for (i = 0; i < __arraycount(dist); i++) { \
+		if (dist[i]) \
+			u += dist[i]; \
+	} \
+	if (u < 3) \
+		return 0; \
+\
+	return 1; \
 }

-private int
-looks_latin1(const unsigned char *buf, size_t nbytes, unicodechar *ubuf, size_t *ulen)
-{
-	size_t i;
-
-	*ulen = 0;
-
-	for (i = 0; i < nbytes; i++) {
-		int t = text_chars[buf[i]];
-
-		if (t != T && t != I)
-			return 0;
-
-		ubuf[(*ulen)++] = buf[i];
-	}
-
-	return 1;
-}
-
-private int
-looks_extended(const unsigned char *buf, size_t nbytes, unicodechar *ubuf,
-    size_t *ulen)
-{
-	size_t i;
-
-	*ulen = 0;
-
-	for (i = 0; i < nbytes; i++) {
-		int t = text_chars[buf[i]];
-
-		if (t != T && t != I && t != X)
-			return 0;
-
-		ubuf[(*ulen)++] = buf[i];
-	}
-
-	return 1;
-}
+LOOKS(ascii, t != T)
+LOOKS(latin1, t != T && t != I)
+LOOKS(extended, t != T && t != I && t != X)

 /*
 * Decide whether some text looks like UTF-8. Returns:
@ -319,12 +306,65 @@ looks_extended(const unsigned char *buf, size_t nbytes, unicodechar *ubuf,
 * If ubuf is non-NULL on entry, text is decoded into ubuf, *ulen;
 * ubuf must be big enough!
 */
+
+// from: https://golang.org/src/unicode/utf8/utf8.go
+
+#define	XX 0xF1 // invalid: size 1
+#define	AS 0xF0 // ASCII: size 1
+#define	S1 0x02 // accept 0, size 2
+#define	S2 0x13 // accept 1, size 3
+#define	S3 0x03 // accept 0, size 3
+#define	S4 0x23 // accept 2, size 3
+#define	S5 0x34 // accept 3, size 4
+#define	S6 0x04 // accept 0, size 4
+#define	S7 0x44 // accept 4, size 4
+
+#define LOCB 0x80
+#define HICB 0xBF
+
+// first is information about the first byte in a UTF-8 sequence.
+static const uint8_t first[] = {
+    //   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
+    AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x00-0x0F
+    AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x10-0x1F
+    AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x20-0x2F
+    AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x30-0x3F
+    AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x40-0x4F
+    AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x50-0x5F
+    AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x60-0x6F
+    AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x70-0x7F
+    //   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
+    XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0x80-0x8F
+    XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0x90-0x9F
+    XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0xA0-0xAF
+    XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0xB0-0xBF
+    XX, XX, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, // 0xC0-0xCF
+    S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, // 0xD0-0xDF
+    S2, S3, S3, S3, S3, S3, S3, S3, S3, S3, S3, S3, S3, S4, S3, S3, // 0xE0-0xEF
+    S5, S6, S6, S6, S7, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0xF0-0xFF
+};
+
+// acceptRange gives the range of valid values for the second byte in a UTF-8
+// sequence.
+struct accept_range {
+	uint8_t lo; // lowest value for second byte.
+	uint8_t hi; // highest value for second byte.
+} accept_ranges[16] = {
+// acceptRanges has size 16 to avoid bounds checks in the code that uses it.
+	{ LOCB, HICB },
+	{ 0xA0, HICB },
+	{ LOCB, 0x9F },
+	{ 0x90, HICB },
+	{ LOCB, 0x8F },
+};
+
 protected int
-file_looks_utf8(const unsigned char *buf, size_t nbytes, unicodechar *ubuf, size_t *ulen)
+file_looks_utf8(const unsigned char *buf, size_t nbytes, file_unichar_t *ubuf,
+    size_t *ulen)
 {
 	size_t i;
 	int n;
-	unicodechar c;
+	file_unichar_t c;
 	int gotone = 0, ctrl = 0;

 	if (ubuf)
@ -346,6 +386,10 @@ file_looks_utf8(const unsigned char *buf, size_t nbytes, unicodechar *ubuf, size
 			return -1;
 		} else {			   /* 11xxxxxx begins UTF-8 */
 			int following;
+			uint8_t x = first[buf[i]];
+			const struct accept_range *ar = &accept_ranges[x >> 4];
+			if (x == XX)
+				return -1;

 			if ((buf[i] & 0x20) == 0) {		/* 110xxxxx */
 				c = buf[i] & 0x1f;
@ -370,6 +414,10 @@ file_looks_utf8(const unsigned char *buf, size_t nbytes, unicodechar *ubuf, size
 				if (i >= nbytes)
 					goto done;

+				if (n == 0 &&
+				     (buf[i] < ar->lo || buf[i] > ar->hi))
+					return -1;
+
 				if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40))
 					return -1;

@ -391,8 +439,8 @@ done:
 * rest of the text.
 */
 private int
-looks_utf8_with_BOM(const unsigned char *buf, size_t nbytes, unicodechar *ubuf,
-    size_t *ulen)
+looks_utf8_with_BOM(const unsigned char *buf, size_t nbytes,
+    file_unichar_t *ubuf, size_t *ulen)
 {
 	if (nbytes > 3 && buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf)
 		return file_looks_utf8(buf + 3, nbytes - 3, ubuf, ulen);
@ -401,7 +449,8 @@ looks_utf8_with_BOM(const unsigned char *buf, size_t nbytes, unicodechar *ubuf,
 }

 private int
-looks_utf7(const unsigned char *buf, size_t nbytes, unicodechar *ubuf, size_t *ulen)
+looks_utf7(const unsigned char *buf, size_t nbytes, file_unichar_t *ubuf,
+    size_t *ulen)
 {
 	if (nbytes > 4 && buf[0] == '+' && buf[1] == '/' && buf[2] == 'v')
 		switch (buf[3]) {
@ -420,7 +469,7 @@ looks_utf7(const unsigned char *buf, size_t nbytes, unicodechar *ubuf, size_t *u
 }

 private int
-looks_ucs16(const unsigned char *bf, size_t nbytes, unicodechar *ubf,
+looks_ucs16(const unsigned char *bf, size_t nbytes, file_unichar_t *ubf,
    size_t *ulen)
 {
 	int bigend;
@ -443,10 +492,10 @@ looks_ucs16(const unsigned char *bf, size_t nbytes, unicodechar *ubf,

 		if (bigend)
 			ubf[(*ulen)++] = bf[i + 1]
-			    | (CAST(unicodechar, bf[i]) << 8);
+			    | (CAST(file_unichar_t, bf[i]) << 8);
 		else
 			ubf[(*ulen)++] = bf[i]
-			    | (CAST(unicodechar, bf[i + 1]) << 8);
+			    | (CAST(file_unichar_t, bf[i + 1]) << 8);

 		if (ubf[*ulen - 1] == 0xfffe)
 			return 0;
@ -459,7 +508,7 @@ looks_ucs16(const unsigned char *bf, size_t nbytes, unicodechar *ubf,
 }

 private int
-looks_ucs32(const unsigned char *bf, size_t nbytes, unicodechar *ubf,
+looks_ucs32(const unsigned char *bf, size_t nbytes, file_unichar_t *ubf,
    size_t *ulen)
 {
 	int bigend;
@ -481,15 +530,15 @@ looks_ucs32(const unsigned char *bf, size_t nbytes, unicodechar *ubf,
 		/* XXX fix to properly handle chars > 65536 */

 		if (bigend)
-			ubf[(*ulen)++] = CAST(unicodechar, bf[i + 3])
-			    | (CAST(unicodechar, bf[i + 2]) << 8)
-			    | (CAST(unicodechar, bf[i + 1]) << 16)
-			    | (CAST(unicodechar, bf[i]) << 24);
+			ubf[(*ulen)++] = CAST(file_unichar_t, bf[i + 3])
+			    | (CAST(file_unichar_t, bf[i + 2]) << 8)
+			    | (CAST(file_unichar_t, bf[i + 1]) << 16)
+			    | (CAST(file_unichar_t, bf[i]) << 24);
 		else
-			ubf[(*ulen)++] = CAST(unicodechar, bf[i + 0])
-			    | (CAST(unicodechar, bf[i + 1]) << 8) 
-			    | (CAST(unicodechar, bf[i + 2]) << 16)
-			    | (CAST(unicodechar, bf[i + 3]) << 24);
+			ubf[(*ulen)++] = CAST(file_unichar_t, bf[i + 0])
+			    | (CAST(file_unichar_t, bf[i + 1]) << 8) 
+			    | (CAST(file_unichar_t, bf[i + 2]) << 16)
+			    | (CAST(file_unichar_t, bf[i + 3]) << 24);

 		if (ubf[*ulen - 1] == 0xfffe)
 			return 0;
--- a/ext/fileinfo/libmagic/file.h
+++ b/ext/fileinfo/libmagic/file.h
@ -27,7 +27,7 @@
 */
 /*
 * file.h - definitions for file(1) program
- * @(#)$File: file.h,v 1.220 2020/06/08 17:38:27 christos Exp $
+ * @(#)$File: file.h,v 1.225 2021/02/05 22:29:07 christos Exp $
 */

 #ifndef __file_h__
@ -35,6 +35,7 @@

 #include "config.h"

+#include "php.h"
 #include "ext/standard/php_string.h"
 #include "ext/pcre/php_pcre.h"

@ -136,6 +137,14 @@
 #define	MAX(a,b)	(((a) > (b)) ? (a) : (b))
 #endif

+#ifndef O_CLOEXEC
+# define O_CLOEXEC 0
+#endif
+
+#ifndef FD_CLOEXEC
+# define FD_CLOEXEC 1
+#endif
+
 #define FILE_BADSIZE CAST(size_t, ~0ul)
 #define MAXDESC	64		/* max len of text description/MIME type */
 #define MAXMIME	80		/* max len of text MIME type */
@ -403,14 +412,16 @@ struct level_info {
 #endif
 };

+struct cont {
+	size_t len;
+	struct level_info *li;
+};
+
 #define MAGIC_SETS	2

 struct magic_set {
 	struct mlist *mlist[MAGIC_SETS];	/* list of regular entries */
-	struct cont {
-		size_t len;
-		struct level_info *li;
-	} c;
+	struct cont c;
 	struct out {
 		char *buf;		/* Accumulation buffer */
 		size_t blen;		/* Length of buffer */
@ -445,6 +456,7 @@ struct magic_set {
 	uint16_t elf_notes_max;
 	uint16_t regex_max;
 	size_t bytes_max;		/* number of bytes to read from file */
+	size_t encoding_max;		/* bytes to look for encoding */
 #ifndef FILE_BYTES_MAX
 # define FILE_BYTES_MAX (1024 * 1024)	/* how much of the file to look at */
 #endif
@ -454,11 +466,13 @@ struct magic_set {
 #define	FILE_INDIR_MAX			50
 #define	FILE_NAME_MAX			50
 #define	FILE_REGEX_MAX			8192
+#define	FILE_ENCODING_MAX		(64 * 1024)
 };

 /* Type for Unicode characters */
-typedef unsigned long unicodechar;
+typedef unsigned long file_unichar_t;

+struct stat;
 #define FILE_T_LOCAL	1
 #define FILE_T_WINDOWS	2
 protected const char *file_fmttime(char *, size_t, uint64_t, int);
@ -468,6 +482,8 @@ protected int file_buffer(struct magic_set *, php_stream *, zend_stat_t *, const
    size_t);
 protected int file_fsmagic(struct magic_set *, const char *, zend_stat_t *);
 protected int file_pipe2file(struct magic_set *, int, const void *, size_t);
+protected int file_vprintf(struct magic_set *, const char *, va_list)
+    __attribute__((__format__(__printf__, 2, 0)));
 protected int file_separator(struct magic_set *);
 protected char *file_copystr(char *, size_t, size_t, const char *);
 protected int file_checkfmt(char *, size_t, const char *);
@ -486,15 +502,17 @@ protected int file_zmagic(struct magic_set *, const struct buffer *,
 protected int file_ascmagic(struct magic_set *, const struct buffer *,
    int);
 protected int file_ascmagic_with_encoding(struct magic_set *,
-    const struct buffer *, unicodechar *, size_t, const char *, const char *, int);
+    const struct buffer *, file_unichar_t *, size_t, const char *, const char *, int);
 protected int file_encoding(struct magic_set *, const struct buffer *,
-    unicodechar **, size_t *, const char **, const char **, const char **);
+    file_unichar_t **, size_t *, const char **, const char **, const char **);
 protected int file_is_json(struct magic_set *, const struct buffer *);
 protected int file_is_csv(struct magic_set *, const struct buffer *, int);
 protected int file_is_tar(struct magic_set *, const struct buffer *);
 protected int file_softmagic(struct magic_set *, const struct buffer *,
    uint16_t *, uint16_t *, int, int);
 protected int file_apprentice(struct magic_set *, const char *, int);
+protected int buffer_apprentice(struct magic_set *, struct magic **,
+    size_t *, size_t);
 protected int file_magicfind(struct magic_set *, const char *, struct mlist *);
 protected uint64_t file_signextend(struct magic_set *, struct magic *,
    uint64_t);
@ -510,7 +528,7 @@ protected size_t file_mbswidth(const char *);
 protected const char *file_getbuffer(struct magic_set *);
 protected ssize_t sread(int, void *, size_t, int);
 protected int file_check_mem(struct magic_set *, unsigned int);
-protected int file_looks_utf8(const unsigned char *, size_t, unicodechar *,
+protected int file_looks_utf8(const unsigned char *, size_t, file_unichar_t *,
    size_t *);
 protected size_t file_pstring_length_size(struct magic_set *,
    const struct magic *);
@ -521,6 +539,9 @@ protected char * file_printable(char *, size_t, const char *, size_t);
 protected int file_os2_apptype(struct magic_set *, const char *, const void *,
    size_t);
 #endif /* __EMX__ */
+protected int file_pipe_closexec(int *);
+protected int file_clear_closexec(int);
+protected char *file_strtrim(char *);

 protected void buffer_init(struct buffer *, int, const zend_stat_t *,
    const void *, size_t);
--- a/ext/fileinfo/libmagic/funcs.c
+++ b/ext/fileinfo/libmagic/funcs.c
@ -27,7 +27,7 @@
 #include "file.h"

 #ifndef	lint
-FILE_RCSID("@(#)$File: funcs.c,v 1.115 2020/02/20 15:50:20 christos Exp $")
+FILE_RCSID("@(#)$File: funcs.c,v 1.121 2021/02/05 22:29:07 christos Exp $")
 #endif	/* lint */

 #include "magic.h"
@ -36,6 +36,9 @@ FILE_RCSID("@(#)$File: funcs.c,v 1.115 2020/02/20 15:50:20 christos Exp $")
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>	/* for pipe2() */
+#endif
 #if defined(HAVE_WCHAR_H)
 #include <wchar.h>
 #endif
@ -100,7 +103,7 @@ file_checkfmt(char *msg, size_t mlen, const char *fmt)
 		if (*++p == '%')
 			continue;
 		// Skip uninteresting.
-		while (strchr("0.'+- ", *p) != NULL)
+		while (strchr("#0.'+- ", *p) != NULL)
 			p++;
 		if (*p == '*') {
 			if (msg)
@ -126,27 +129,56 @@ file_checkfmt(char *msg, size_t mlen, const char *fmt)
 	return 0;
 }

+/*
+ * Like printf, only we append to a buffer.
+ */
+protected int
+file_vprintf(struct magic_set *ms, const char *fmt, va_list ap)
+{
+	size_t len;
+	char *buf, *newstr;
+	char tbuf[1024];
+
+	if (ms->event_flags & EVENT_HAD_ERR)
+		return 0;
+
+	if (file_checkfmt(tbuf, sizeof(tbuf), fmt)) {
+		file_clearbuf(ms);
+		file_error(ms, 0, "Bad magic format `%s' (%s)", fmt, tbuf);
+		return -1;
+	}
+
+	len = vspprintf(&buf, 0, fmt, ap);
+	if (len > 1024 || len + ms->o.blen > 1024 * 1024) {
+		size_t blen = ms->o.blen;
+		if (buf) efree(buf);
+		file_clearbuf(ms);
+		file_error(ms, 0, "Output buffer space exceeded %d+%zu", len,
+		    blen);
+		return -1;
+	}
+
+	if (ms->o.buf != NULL) {
+		len = spprintf(&newstr, 0, "%s%s", ms->o.buf, buf);
+		efree(buf);
+		efree(ms->o.buf);
+		buf = newstr;
+	}
+	ms->o.buf = buf;
+	ms->o.blen = len;
+	return 0;
+}
+
 protected int
 file_printf(struct magic_set *ms, const char *fmt, ...)
 {
+	int rv;
 	va_list ap;
-	char *buf = NULL, *newstr;

 	va_start(ap, fmt);
-	vspprintf(&buf, 0, fmt, ap);
+	rv = file_vprintf(ms, fmt, ap);
 	va_end(ap);
-
-	if (ms->o.buf != NULL) {
-		spprintf(&newstr, 0, "%s%s", ms->o.buf, (buf ? buf : ""));
-		if (buf) {
-			efree(buf);
-		}
-		efree(ms->o.buf);
-		ms->o.buf = newstr;
-	} else {
-		ms->o.buf = buf;
-	}
-	return 0;
+	return rv;
 }

 /*
@ -157,30 +189,18 @@ private void
 file_error_core(struct magic_set *ms, int error, const char *f, va_list va,
    size_t lineno)
 {
-	char *buf = NULL;
-
 	/* Only the first error is ok */
 	if (ms->event_flags & EVENT_HAD_ERR)
 		return;
 	if (lineno != 0) {
-		efree(ms->o.buf);
-		ms->o.buf = NULL;
-		file_printf(ms, "line %" SIZE_T_FORMAT "u:", lineno);
+		file_clearbuf(ms);
+		(void)file_printf(ms, "line %" SIZE_T_FORMAT "u:", lineno);
 	}
-
-	vspprintf(&buf, 0, f, va);
-	va_end(va);
-
-	if (error > 0) {
-		file_printf(ms, "%s (%s)", (*buf ? buf : ""), strerror(error));
-	} else if (*buf) {
-		file_printf(ms, "%s", buf);
-	}
-
-	if (buf) {
-		efree(buf);
-	}
-
+	if (ms->o.buf && *ms->o.buf)
+		(void)file_printf(ms, " ");
+	(void)file_vprintf(ms, f, va);
+	if (error > 0)
+		(void)file_printf(ms, " (%s)", strerror(error));
 	ms->event_flags |= EVENT_HAD_ERR;
 	ms->error = error;
 }
@ -228,11 +248,31 @@ file_badread(struct magic_set *ms)
 }

 #ifndef COMPILE_ONLY
+#define FILE_SEPARATOR "\n- "

 protected int
 file_separator(struct magic_set *ms)
 {
-	return file_printf(ms, "\n- ");
+	return file_printf(ms, FILE_SEPARATOR);
+}
+
+static void
+trim_separator(struct magic_set *ms)
+{
+	size_t l;
+
+	if (ms->o.buf == NULL)
+		return;
+
+	l = strlen(ms->o.buf);
+	if (l < sizeof(FILE_SEPARATOR))
+		return;
+
+	l -= sizeof(FILE_SEPARATOR) - 1;
+	if (strcmp(ms->o.buf + l, FILE_SEPARATOR) != 0)
+		return;
+
+	ms->o.buf[l] = '\0';
 }

 static int
@ -450,6 +490,7 @@ simple:
 				rv = -1;
 	}
 done:
+	trim_separator(ms);
 	if ((ms->flags & MAGIC_MIME_ENCODING) != 0) {
 		if (ms->flags & MAGIC_MIME_TYPE)
 			if (file_printf(ms, "; charset=") == -1)
@ -598,7 +639,7 @@ file_check_mem(struct magic_set *ms, unsigned int level)
 protected size_t
 file_printedlen(const struct magic_set *ms)
 {
-	return ms->o.buf == NULL ? 0 : strlen(ms->o.buf);
+	return ms->o.blen;
 }

 protected int
@ -717,7 +758,7 @@ struct guid {
 protected int
 file_parse_guid(const char *s, uint64_t *guid)
 {
-	struct guid *g = CAST(struct guid *, guid);
+	struct guid *g = CAST(struct guid *, CAST(void *, guid));
 	return sscanf(s,
 	    "%8x-%4hx-%4hx-%2hhx%2hhx-%2hhx%2hhx%2hhx%2hhx%2hhx%2hhx",
 	    &g->data1, &g->data2, &g->data3, &g->data4[0], &g->data4[1],
@ -728,7 +769,8 @@ file_parse_guid(const char *s, uint64_t *guid)
 protected int
 file_print_guid(char *str, size_t len, const uint64_t *guid)
 {
-	const struct guid *g = CAST(const struct guid *, guid);
+	const struct guid *g = CAST(const struct guid *,
+	    CAST(const void *, guid));

 	return snprintf(str, len, "%.8X-%.4hX-%.4hX-%.2hhX%.2hhX-"
 	    "%.2hhX%.2hhX%.2hhX%.2hhX%.2hhX%.2hhX",
@ -736,3 +778,39 @@ file_print_guid(char *str, size_t len, const uint64_t *guid)
 	    g->data4[2], g->data4[3], g->data4[4], g->data4[5],
 	    g->data4[6], g->data4[7]);
 }
+
+protected int
+file_pipe_closexec(int *fds)
+{
+#ifdef HAVE_PIPE2
+	return pipe2(fds, O_CLOEXEC);
+#else
+	if (pipe(fds) == -1)
+		return -1;
+	(void)fcntl(fds[0], F_SETFD, FD_CLOEXEC);
+	(void)fcntl(fds[1], F_SETFD, FD_CLOEXEC);
+	return 0;
+#endif
+}
+
+protected int
+file_clear_closexec(int fd) {
+	return fcntl(fd, F_SETFD, 0);
+}
+
+protected char *
+file_strtrim(char *str)
+{
+	char *last;
+
+	while (isspace(CAST(unsigned char, *str)))
+		str++;
+	last = str;
+	while (*last)
+		last++;
+	--last;
+	while (isspace(CAST(unsigned char, *last)))
+		last--;
+	*++last = '\0';
+	return str;
+}
--- a/ext/fileinfo/libmagic/is_csv.c
+++ b/ext/fileinfo/libmagic/is_csv.c
@ -32,7 +32,7 @@
 #include "file.h"

 #ifndef lint
-FILE_RCSID("@(#)$File: is_csv.c,v 1.4 2019/06/26 20:31:31 christos Exp $")
+FILE_RCSID("@(#)$File: is_csv.c,v 1.6 2020/08/09 16:43:36 christos Exp $")
 #endif

 #include <string.h>
@ -94,8 +94,7 @@ csv_parse(const unsigned char *uc, const unsigned char *ue)
 	size_t nf = 0, tf = 0, nl = 0;

 	while (uc < ue) {
-		unsigned char c;
-		switch (c = *uc++) {
+		switch (*uc++) {
 		case '"':
 			// Eat until the matching quote
 			uc = eatquote(uc, ue);
@ -150,7 +149,7 @@ file_is_csv(struct magic_set *ms, const struct buffer *b, int looks_text)
 		return 1;

 	if (mime) {
-		if (file_printf(ms, "application/csv") == -1)
+		if (file_printf(ms, "text/csv") == -1)
 			return -1;
 		return 1;
 	}
--- a/ext/fileinfo/libmagic/magic.c
+++ b/ext/fileinfo/libmagic/magic.c
@ -28,7 +28,7 @@
 #include "file.h"

 #ifndef	lint
-FILE_RCSID("@(#)$File: magic.c,v 1.112 2020/06/08 19:44:10 christos Exp $")
+FILE_RCSID("@(#)$File: magic.c,v 1.114 2021/02/05 21:33:49 christos Exp $")
 #endif	/* lint */

 #include "magic.h"
@ -348,6 +348,9 @@ magic_setparam(struct magic_set *ms, int param, const void *val)
 	case MAGIC_PARAM_BYTES_MAX:
 		ms->bytes_max = *CAST(const size_t *, val);
 		return 0;
+	case MAGIC_PARAM_ENCODING_MAX:
+		ms->encoding_max = *CAST(const size_t *, val);
+		return 0;
 	default:
 		errno = EINVAL;
 		return -1;
@ -381,6 +384,9 @@ magic_getparam(struct magic_set *ms, int param, void *val)
 	case MAGIC_PARAM_BYTES_MAX:
 		*CAST(size_t *, val) = ms->bytes_max;
 		return 0;
+	case MAGIC_PARAM_ENCODING_MAX:
+		*CAST(size_t *, val) = ms->encoding_max;
+		return 0;
 	default:
 		errno = EINVAL;
 		return -1;
--- a/ext/fileinfo/libmagic/magic.h
+++ b/ext/fileinfo/libmagic/magic.h
@ -113,7 +113,7 @@ b\31transp_compression\0\
 #define	MAGIC_NO_CHECK_FORTRAN	0x000000 /* Don't check ascii/fortran */
 #define	MAGIC_NO_CHECK_TROFF	0x000000 /* Don't check ascii/troff */

-#define MAGIC_VERSION		539	/* This implementation */
+#define MAGIC_VERSION		540	/* This implementation */


 #ifdef __cplusplus
@ -150,6 +150,7 @@ int magic_errno(magic_t);
 #define MAGIC_PARAM_ELF_NOTES_MAX	4
 #define MAGIC_PARAM_REGEX_MAX		5
 #define	MAGIC_PARAM_BYTES_MAX		6
+#define	MAGIC_PARAM_ENCODING_MAX	7

 int magic_setparam(magic_t, int, const void *);
 int magic_getparam(magic_t, int, void *);
--- a/ext/fileinfo/libmagic/patchlevel.h
+++ b/ext/fileinfo/libmagic/patchlevel.h
@ -1,390 +0,0 @@
-#define	FILE_VERSION_MAJOR	5
-#define	patchlevel		37
-
-/*
- * Patchlevel file for Ian Darwin's MAGIC command.
- * $File: patchlevel.h,v 1.68 2008/03/22 21:39:43 christos Exp $
- *
- * $Log$
- * Revision 2.1  2019/05/30 22:27:12 ab
- * Update libmagic to 5.37
- *
- * $Log$
- * Revision 2.1  2018/04/26 22:27:12 ab
- * Update libmagic to 5.33
- *
- * $Log$
- * Revision 2.0  2017/10/11 22:27:12 ab
- * Update libmagic to 5.31
- *
- * $Log$
- * Revision 1.9  2016/11/24 22:27:12 ab
- * Update libmagic to 5.29
- *
- * $Log$
- * Revision 1.9  2016/10/11 22:27:12 ab
- * Update libmagic to 5.28
- *
- * $Log$
- * Revision 1.9  2015/03/06 22:27:12 ab
- * Update libmagic to 5.2X
- *
- * $Log$
- * Revision 1.8  2014/02/18 22:27:12 ab
- * Update libmagic to 5.17
- *
- * $Log$
- * Revision 1.7  2013/03/26 22:27:12 ab
- * Update libmagic to 5.14
- *
- * $Log$
- * Revision 1.6  2012/03/26 21:01:37 ab
- * Update libmagic to 5.11
- *
- * Revision 1.5  2012/03/25 13:54:37  ab
- * Update libmagic to 5.04
- *
- * Revision 1.4  2009/05/04 20:52:43  scottmac
- * Update libmagic to 5.02
- *
- * Revision 1.3  2009/03/15 23:02:35  scottmac
- * Update fileinfo to libmagic 5.00 and remove dependency on dirent.h on Windows
- *
- * Revision 1.2  2008/11/02 16:09:27  scottmac
- * Update libmagic to 4.26 and add support for v6 of the magic file format.
- *
- * Revision 1.1  2008/07/11 14:13:50  derick
- * - Move lib to libmagic
- *
- * Revision 1.1  2008/07/11 14:10:50  derick
- * - Step one for bundling the libmagic library. Some config.m4 issues left.
- *
- * Revision 1.69  2008/07/02 15:27:05  christos
- * welcome to 4.25
- *
- * Revision 1.68  2008/03/22 21:39:43  christos
- * file 4.24
- *
- * Revision 1.67  2007/12/28 20:08:40  christos
- * welcome to 4.23.
- *
- * Revision 1.66  2007/12/27 16:38:24  christos
- * welcome to 4.22
- *
- * Revision 1.65  2007/05/24 17:22:27  christos
- * Welcome to 4.21
- *
- * Revision 1.64  2007/03/01 22:14:55  christos
- * welcome to 4.20
- *
- * Revision 1.63  2007/01/12 17:38:28  christos
- * Use File id.
- *
- * Revision 1.62  2006/12/11 21:49:58  christos
- * time for 4.19
- *
- * Revision 1.61  2006/10/31 21:18:09  christos
- * bump
- *
- * Revision 1.60  2006/03/02 22:15:12  christos
- * welcome to 4.17
- *
- * Revision 1.59  2005/10/17 17:15:21  christos
- * welcome to 4.16
- *
- * Revision 1.58  2005/08/18 15:52:56  christos
- * welcome to 4.15
- *
- * Revision 1.57  2005/06/25 15:52:14  christos
- * Welcome to 4.14
- *
- * Revision 1.56  2005/02/09 19:25:13  christos
- * Welcome to 4.13
- *
- * Revision 1.55  2004/11/24 18:57:47  christos
- * Re-do the autoconf stuff once more; passes make dist now.
- *
- * Revision 1.54  2004/11/21 05:52:05  christos
- * ready for 4.11
- *
- * Revision 1.53  2004/07/24 20:40:46  christos
- * welcome to 4.10
- *
- * Revision 1.52  2004/04/07 00:32:25  christos
- * welcome to 4.09
- *
- * Revision 1.51  2004/03/22 21:17:11  christos
- * welcome to 4.08.
- *
- * Revision 1.50  2003/12/23 17:34:04  christos
- * 4.07
- *
- * Revision 1.49  2003/10/15 02:08:27  christos
- * welcome to 4.06
- *
- * Revision 1.48  2003/09/12 19:41:14  christos
- * this is 4.04
- *
- * Revision 1.47  2003/05/23 21:38:21  christos
- * welcome to 4.03
- *
- * Revision 1.46  2003/04/02 18:57:43  christos
- * prepare for 4.02
- *
- * Revision 1.45  2003/03/26 15:37:25  christos
- * - Pass lint
- * - make NULL in magic_file mean stdin
- * - Fix "-" argument to file to pass NULL to magic_file
- * - avoid pointer casts by using memcpy
- * - rename magic_buf -> magic_buffer
- * - keep only the first error
- * - manual page: new sentence, new line
- * - fix typo in api function (magic_buf -> magic_buffer)
- *
- * Revision 1.44  2003/03/23 22:23:31  christos
- * finish librarification.
- *
- * Revision 1.43  2003/03/23 21:16:26  christos
- * update copyrights.
- *
- * Revision 1.42  2003/03/23 04:06:05  christos
- * Library re-organization
- *
- * Revision 1.41  2003/02/27 20:53:45  christos
- * - fix memory allocation problem (Jeff Johnson)
- * - fix stack overflow corruption (David Endler)
- * - fixes from NetBSD source (Antti Kantee)
- * - magic fixes
- *
- * Revision 1.40  2003/02/08 18:33:53  christos
- * - detect inttypes.h too (Dave Love <d.love@dl.ac.uk>)
- * - eliminate unsigned char warnings (Petter Reinholdtsen <pere@hungry.com>)
- * - better elf PT_NOTE handling (Nalin Dahyabhai <nalin@redhat.com>)
- * - add options to format the output differently
- * - much more magic.
- *
- * Revision 1.39  2002/07/03 18:57:52  christos
- * - ansify/c99ize
- * - more magic
- * - better COMPILE_ONLY support.
- * - new magic files.
- * - fix solaris compilation problems.
- *
- * Revision 1.38  2002/05/16 18:45:56  christos
- * - pt_note elf additions from NetBSD
- * - EMX os specific changes (Alexander Mai)
- * - stdint.h detection, acconfig.h fixes (Maciej W. Rozycki, Franz Korntner)
- * - regex file additions (Kim Cromie)
- * - getopt_long support and misc cleanups (Michael Piefel)
- * - many magic fixes and additions
- *
- * Revision 1.37  2001/09/03 14:44:22  christos
- * daylight/tm_isdst detection
- * magic fixes
- * don't eat the whole file if it has only nulls
- *
- * Revision 1.36  2001/07/22 21:04:15  christos
- * - magic fixes
- * - add new operators, pascal strings, UTC date printing, $HOME/.magic
- *   [from "Tom N Harris" <telliamed@mac.com>]
- *
- * Revision 1.35  2001/04/24 14:40:25  christos
- * - rename magic file sgi to mips and fix it
- * - add support for building magic.mgc
- * - portability fixes for mmap()
- * - try gzip before uncompress, because uncompress sometimes hangs
- * - be more conservative about pipe reads and writes
- * - many magic fixes
- *
- * Revision 1.34  2001/03/12 05:05:57  christos
- * - new compiled magic format
- * - lots of magic additions
- *
- * Revision 1.33  2000/11/13 00:30:50  christos
- * - wordperfect magic fix: freebsd pr 9388
- * - more msdos fixes from freebsd pr's 20131 and 20812
- * - sas and spss magic [Bruce Foster]
- * - mkinstalldirs [John Fremlin]
- * - sgi opengl fixes [Michael Pruett]
- * - netbsd magic fixes [Ignatios Souvatzis]
- * - audio additions [Michael Pruett]
- * - fix problem with non ansi RCSID [Andreas Ley]
- * - oggs magic [Felix von Leitner]
- * - gmon magic [Eugen Dedu]
- * - TNEF magic [Joomy]
- * - netpbm magic and misc other image stuff [Bryan Henderson]
- *
- * Revision 1.32  2000/08/05 18:24:18  christos
- * Correct indianness detection in elf (Charles Hannum)
- * FreeBSD elf core support (Guy Harris)
- * Use gzip in systems that don't have uncompress (Anthon van der Neut)
- * Internationalization/EBCDIC support (Eric Fisher)
- * Many many magic changes
- *
- * Revision 1.31  2000/05/14 17:58:36  christos
- * - new magic for claris files
- * - new magic for mathematica and maple files
- * - new magic for msvc files
- * - new -k flag to keep going matching all possible entries
- * - add the word executable on #! magic files, and fix the usage of
- *   the word script
- * - lots of other magic fixes
- * - fix typo test -> text
- *
- * Revision 1.30  2000/04/11 02:41:17  christos
- * - add support for mime output (-i)
- * - make sure we free memory in case realloc fails
- * - magic fixes
- *
- * Revision 1.29  1999/11/28 20:02:29  christos
- * new string/[Bcb] magic from anthon, and adjustments to the magic files to
- * use it.
- *
- * Revision 1.28  1999/10/31 22:11:48  christos
- * - add "char" type for compatibility with HP/UX
- * - recognize HP/UX syntax &=n etc.
- * - include errno.h for CYGWIN
- * - conditionalize the S_IS* macros
- * - revert the SHT_DYNSYM test that broke the linux stripped binaries test
- * - lots of Magdir changes
- *
- * Revision 1.27  1999/02/14 17:21:41  christos
- * Automake support and misc cleanups from Rainer Orth
- * Enable reading character and block special files from Dale R. Worley
- *
- * Revision 1.26  1998/09/12 13:19:39  christos
- * - add support for bi-endian indirect offsets (Richard Verhoeven)
- * - add recognition for bcpl (Joseph Myers)
- * - remove non magic files from Magdir to avoid difficulties building
- *   on os2 where files are case independent
- * - magic fixes.
- *
- * Revision 1.25  1998/06/27 14:04:04  christos
- * OLF patch Guy Harris
- * Recognize java/html (debian linux)
- * Const poisoning (debian linux)
- * More magic!
- *
- * Revision 1.24  1998/02/15 23:20:38  christos
- * Autoconf patch: Felix von Leitner <leitner@math.fu-berlin.de>
- * More magic fixes
- * Elf64 fixes
- *
- * Revision 1.23  1997/11/05 16:03:37  christos
- * - correct elf prps offset for SunOS-2.5.1 [guy@netapp.com]
- * - handle 64 bit time_t's correctly [ewt@redhat.com]
- * - new mime style magic [clarosse@netvista.net]
- * - new TI calculator magic [rmcguire@freenet.columbus.oh.us]
- * - new figlet fonts [obrien@freebsd.org]
- * - new cisco magic, and elf fixes [jhawk@bbnplanet.com]
- * - -b flag addition, and x86 filesystem magic [vax@linkhead.paranoia.com]
- * - s/Mpeg/MPEG, header and elf typo fixes [guy@netapp.com]
- * - Windows/NT registry files, audio code [guy@netapp.com]
- * - libGrx graphics lib fonts [guy@netapp.com]
- * - PNG fixes [guy@netapp.com]
- * - more m$ document magic [guy@netapp.com]
- * - PPD files [guy@netapp.com]
- * - archive magic cleanup [guy@netapp.com]
- * - linux kernel magic cleanup [guy@netapp.com]
- * - lecter magic [guy@netapp.com]
- * - vgetty magic [guy@netapp.com]
- * - sniffer additions [guy@netapp.com]
- *
- * Revision 1.22  1997/01/15 17:23:24  christos
- * - add support for elf core files: find the program name under SVR4 [Ken Pizzini]
- * - print strings only up to the first carriage return [various]
- * - freebsd international ascii support [J Wunsch]
- * - magic fixes and additions [Guy Harris]
- * - 64 bit fixes [Larry Schwimmer]
- * - support for both utime and utimes, but don't restore file access times
- *   by default [various]
- * - \xXX only takes 2 hex digits, not 3.
- * - re-implement support for core files [Guy Harris]
- *
- * Revision 1.21  1996/10/05 18:15:29  christos
- * Segregate elf stuff and conditionally enable it with -DBUILTIN_ELF
- * More magic fixes
- *
- * Revision 1.20  1996/06/22  22:15:52  christos
- * - support relative offsets of the form >&
- * - fix bug with truncating magic strings that contain \n
- * - file -f - did not read from stdin as documented
- * - support elf file parsing using our own elf support.
- * - as always magdir fixes and additions.
- *
- * Revision 1.19  1995/10/27  23:14:46  christos
- * Ability to parse colon separated list of magic files
- * New LEGAL.NOTICE
- * Various magic file changes
- *
- * Revision 1.18  1995/05/20  22:09:21  christos
- * Passed incorrect argument to eatsize().
- * Use %ld and %lx where appropriate.
- * Remove unused variables
- * ELF support for both big and little endian
- * Fixes for small files again.
- *
- * Revision 1.17  1995/04/28  17:29:13  christos
- * - Incorrect nroff detection fix from der Mouse
- * - Lost and incorrect magic entries.
- * - Added ELF stripped binary detection [in C; ugh]
- * - Look for $MAGIC to find the magic file.
- * - Eat trailing size specifications from numbers i.e. ignore 10L
- * - More fixes for very short files
- *
- * Revision 1.16  1995/03/25  22:06:45  christos
- * - use strtoul() where it exists.
- * - fix sign-extend bug
- * - try to detect tar archives before nroff files, otherwise
- *   tar files where the first file starts with a . will not work
- *
- * Revision 1.15  1995/01/21  21:03:35  christos
- * Added CSECTION for the file man page
- * Added version flag -v
- * Fixed bug with -f input flag (from iorio@violet.berkeley.edu)
- * Lots of magic fixes and reorganization...
- *
- * Revision 1.14  1994/05/03  17:58:23  christos
- * changes from mycroft@gnu.ai.mit.edu (Charles Hannum) for unsigned
- *
- * Revision 1.13  1994/01/21  01:27:01  christos
- * Fixed null termination bug from Don Seeley at BSDI in ascmagic.c
- *
- * Revision 1.12  1993/10/27  20:59:05  christos
- * Changed -z flag to understand gzip format too.
- * Moved builtin compression detection to a table, and move
- * the compress magic entry out of the source.
- * Made printing of numbers unsigned, and added the mask to it.
- * Changed the buffer size to 8k, because gzip will refuse to
- * unzip just a few bytes.
- *
- * Revision 1.11  1993/09/24  18:49:06  christos
- * Fixed small bug in softmagic.c introduced by
- * copying the data to be examined out of the input
- * buffer. Changed the Makefile to use sed to create
- * the correct man pages.
- *
- * Revision 1.10  1993/09/23  21:56:23  christos
- * Passed purify. Fixed indirections. Fixed byte order printing.
- * Fixed segmentation faults caused by referencing past the end
- * of the magic buffer. Fixed bus errors caused by referencing
- * unaligned shorts or longs.
- *
- * Revision 1.9  1993/03/24  14:23:40  ian
- * Batch of minor changes from several contributors.
- *
- * Revision 1.8  93/02/19  15:01:26  ian
- * Numerous changes from Guy Harris too numerous to mention but including
- * byte-order independance, fixing "old-style masking", etc. etc. A bugfix
- * for broken symlinks from martin@@d255s004.zfe.siemens.de.
- *
- * Revision 1.7  93/01/05  14:57:27  ian
- * Couple of nits picked by Christos (again, thanks).
- *
- * Revision 1.6  93/01/05  13:51:09  ian
- * Lotsa work on the Magic directory.
- *
- * Revision 1.5  92/09/14  14:54:51  ian
- * Fix a tiny null-pointer bug in previous fix for tar archive + uncompress.
- *
- */
--- a/ext/fileinfo/libmagic/softmagic.c
+++ b/ext/fileinfo/libmagic/softmagic.c
@ -32,7 +32,7 @@
 #include "file.h"

 #ifndef	lint
-FILE_RCSID("@(#)$File: softmagic.c,v 1.299 2020/06/07 21:58:01 christos Exp $")
+FILE_RCSID("@(#)$File: softmagic.c,v 1.309 2021/02/05 22:29:07 christos Exp $")
 #endif	/* lint */

 #include "magic.h"
@ -169,6 +169,8 @@ file_fmtcheck(struct magic_set *ms, const char *desc, const char *def,
 #define F(a, b, c) ((b))
 #endif

+/* NOTE this function has been kept an the state of 5.39 for BC. Observe
+ * further as the upgrade to 5.41 or above goes. */
 /*
 * Go through the whole list, stopping if you find a match.  Process all
 * the continuations of that match before returning.
@ -498,6 +500,28 @@ check_fmt(struct magic_set *ms, const char *fmt)
 	return rv;
 }

+#if !defined(HAVE_STRNDUP) || defined(__aiws__) || defined(_AIX)
+# if defined(__aiws__) || defined(_AIX)
+#  define strndup aix_strndup	/* aix is broken */
+# endif
+char *strndup(const char *, size_t);
+
+char *
+strndup(const char *str, size_t n)
+{
+	size_t len;
+	char *copy;
+
+	for (len = 0; len < n && str[len]; len++)
+		continue;
+	if ((copy = malloc(len + 1)) == NULL)
+		return NULL;
+	(void)memcpy(copy, str, len);
+	copy[len] = '\0';
+	return copy;
+}
+#endif /* HAVE_STRNDUP */
+
 static int
 varexpand(struct magic_set *ms, char *buf, size_t len, const char *str)
 {
@ -569,93 +593,58 @@ mprint(struct magic_set *ms, struct magic *m)
 	else
 		desc = ebuf;

+#define	PRINTER(value, format, stype, utype)	\
+	v = file_signextend(ms, m, CAST(uint64_t, value)); \
+	switch (check_fmt(ms, desc)) { \
+	case -1: \
+		return -1; \
+	case 1: \
+		if (m->flag & UNSIGNED) { \
+			(void)snprintf(buf, sizeof(buf), "%" format "u", \
+			    CAST(utype, v)); \
+		} else { \
+			(void)snprintf(buf, sizeof(buf), "%" format "d", \
+			    CAST(stype, v)); \
+		} \
+		if (file_printf(ms, F(ms, desc, "%s"), buf) == -1) \
+			return -1; \
+		break; \
+	default: \
+		if (m->flag & UNSIGNED) { \
+		       if (file_printf(ms, F(ms, desc, "%" format "u"), \
+			   CAST(utype, v)) == -1) \
+			   return -1; \
+		} else { \
+		       if (file_printf(ms, F(ms, desc, "%" format "d"), \
+			   CAST(stype, v)) == -1) \
+			   return -1; \
+		} \
+		break; \
+	} \
+	t = ms->offset + sizeof(stype); \
+	break
+
  	switch (m->type) {
  	case FILE_BYTE:
-		v = file_signextend(ms, m, CAST(uint64_t, p->b));
-		switch (check_fmt(ms, desc)) {
-		case -1:
-			return -1;
-		case 1:
-			(void)snprintf(buf, sizeof(buf), "%d",
-			    CAST(unsigned char, v));
-			if (file_printf(ms, F(ms, desc, "%s"), buf) == -1)
-				return -1;
-			break;
-		default:
-			if (file_printf(ms, F(ms, desc, "%d"),
-			    CAST(unsigned char, v)) == -1)
-				return -1;
-			break;
-		}
-		t = ms->offset + sizeof(char);
-		break;
+		PRINTER(p->b, "", int8_t, uint8_t);

  	case FILE_SHORT:
  	case FILE_BESHORT:
  	case FILE_LESHORT:
-		v = file_signextend(ms, m, CAST(uint64_t, p->h));
-		switch (check_fmt(ms, desc)) {
-		case -1:
-			return -1;
-		case 1:
-			(void)snprintf(buf, sizeof(buf), "%u",
-			    CAST(unsigned short, v));
-			if (file_printf(ms, F(ms, desc, "%s"), buf) == -1)
-				return -1;
-			break;
-		default:
-			if (file_printf(ms, F(ms, desc, "%u"),
-			    CAST(unsigned short, v)) == -1)
-				return -1;
-			break;
-		}
-		t = ms->offset + sizeof(short);
-		break;
+		PRINTER(p->h, "", int16_t, uint16_t);

  	case FILE_LONG:
  	case FILE_BELONG:
  	case FILE_LELONG:
  	case FILE_MELONG:
-		v = file_signextend(ms, m, CAST(uint64_t, p->l));
-		switch (check_fmt(ms, desc)) {
-		case -1:
-			return -1;
-		case 1:
-			(void)snprintf(buf, sizeof(buf), "%u",
-			    CAST(uint32_t, v));
-			if (file_printf(ms, F(ms, desc, "%s"), buf) == -1)
-				return -1;
-			break;
-		default:
-			if (file_printf(ms, F(ms, desc, "%u"),
-			    CAST(uint32_t, v)) == -1)
-				return -1;
-			break;
-		}
-		t = ms->offset + sizeof(int32_t);
+		PRINTER(p->l, "", int32_t, uint32_t);
  		break;

  	case FILE_QUAD:
  	case FILE_BEQUAD:
  	case FILE_LEQUAD:
 	case FILE_OFFSET:
-		v = file_signextend(ms, m, p->q);
-		switch (check_fmt(ms, desc)) {
-		case -1:
-			return -1;
-		case 1:
-			(void)snprintf(buf, sizeof(buf), "%" INT64_T_FORMAT "u",
-			    CAST(unsigned long long, v));
-			if (file_printf(ms, F(ms, desc, "%s"), buf) == -1)
-				return -1;
-			break;
-		default:
-			if (file_printf(ms, F(ms, desc, "%" INT64_T_FORMAT "u"),
-			    CAST(unsigned long long, v)) == -1)
-				return -1;
-			break;
-		}
-		t = ms->offset + sizeof(int64_t);
+		PRINTER(p->q, INT64_T_FORMAT, long long, unsigned long long);
  		break;

  	case FILE_STRING:
@ -678,19 +667,9 @@ mprint(struct magic_set *ms, struct magic *m)
 			if (*m->value.s == '\0')
 				str[strcspn(str, "\r\n")] = '\0';

-			if (m->str_flags & STRING_TRIM) {
-				char *last;
-				while (isspace(CAST(unsigned char, *str)))
-					str++;
-				last = str;
-				while (*last)
-					last++;
-				--last;
-				while (isspace(CAST(unsigned char, *last)))
-					last--;
-				*++last = '\0';
-			}
-
+			if (m->str_flags & STRING_TRIM)
+				str = file_strtrim(str);
+					
 			if (file_printf(ms, F(ms, desc, "%s"),
 			    file_printable(sbuf, sizeof(sbuf), str,
 				sizeof(p->s) - (str - p->s))) == -1)
@ -795,14 +774,20 @@ mprint(struct magic_set *ms, struct magic *m)

 	case FILE_SEARCH:
 	case FILE_REGEX: {
-		char *cp;
+		char *cp, *scp;
 		int rval;

-		cp = estrndup(RCAST(const char *, ms->search.s),
+		cp = strndup(RCAST(const char *, ms->search.s),
 		    ms->search.rm_len);
+		if (cp == NULL) {
+			file_oomem(ms, ms->search.rm_len);
+			return -1;
+		}
+		scp = (m->str_flags & STRING_TRIM) ? file_strtrim(cp) : cp;
+					
 		rval = file_printf(ms, F(ms, desc, "%s"),
-		    file_printable(sbuf, sizeof(sbuf), cp, ms->search.rm_len));
-		efree(cp);
+		    file_printable(sbuf, sizeof(sbuf), scp, ms->search.rm_len));
+		free(cp);

 		if (rval == -1)
 			return -1;
@ -955,6 +940,7 @@ moffset(struct magic_set *ms, struct magic *m, const struct buffer *b,
 	case FILE_DEFAULT:
 	case FILE_INDIRECT:
 	case FILE_OFFSET:
+	case FILE_USE:
 		o = ms->offset;
 		break;

@ -1541,6 +1527,28 @@ normal:
 	return 0;
 }

+private int
+save_cont(struct magic_set *ms, struct cont *c)
+{
+	size_t len;
+	*c = ms->c;
+	len = c->len * sizeof(*c->li);
+	ms->c.li = CAST(struct level_info *, malloc(len));
+	if (ms->c.li == NULL) {
+		ms->c = *c;
+		return -1;
+	}
+	memcpy(ms->c.li, c->li, len);
+	return 0;
+}
+
+private void
+restore_cont(struct magic_set *ms, struct cont *c)
+{
+	free(ms->c.li);
+	ms->c = *c;
+}
+
 private int
 mget(struct magic_set *ms, struct magic *m, const struct buffer *b,
    const unsigned char *s, size_t nbytes, size_t o, unsigned int cont_level,
@ -1548,14 +1556,15 @@ mget(struct magic_set *ms, struct magic *m, const struct buffer *b,
    int *printed_something, int *need_separator, int *returnval,
    int *found_match)
 {
-	uint32_t offset = ms->offset;
+	uint32_t eoffset, offset = ms->offset;
 	struct buffer bb;
 	intmax_t lhs;
 	file_pushbuf_t *pb;
-	int rv, oneed_separator, in_type;
+	int rv, oneed_separator, in_type, nfound_match;
 	char *rbuf;
 	union VALUETYPE *p = &ms->ms_value;
 	struct mlist ml;
+	struct cont c;

 	if (*indir_count >= ms->indir_max) {
 		file_error(ms, 0, "indirect count (%hu) exceeded",
@ -1836,7 +1845,8 @@ mget(struct magic_set *ms, struct magic *m, const struct buffer *b,

 		if (rv == 1) {
 			if ((ms->flags & MAGIC_NODESC) == 0 &&
-			    file_printf(ms, F(ms, m->desc, "%u"), offset) == -1) {
+			    file_printf(ms, F(ms, m->desc, "%u"), offset) == -1)
+			{
 				if (rbuf) efree(rbuf);
 				return -1;
 			}
@ -1860,16 +1870,32 @@ mget(struct magic_set *ms, struct magic *m, const struct buffer *b,
 			file_error(ms, 0, "cannot find entry `%s'", rbuf);
 			return -1;
 		}
-		(*name_count)++;
+		if (save_cont(ms, &c) == -1) {
+			file_error(ms, errno, "can't allocate continuation");
+			return -1;
+		}
+
 		oneed_separator = *need_separator;
 		if (m->flag & NOSPACE)
 			*need_separator = 0;
+
+		nfound_match = 0;
+		(*name_count)++;
+		eoffset = ms->eoffset;
 		rv = match(ms, ml.magic, ml.nmagic, b, offset + o,
 		    mode, text, flip, indir_count, name_count,
-		    printed_something, need_separator, returnval, found_match);
+		    printed_something, need_separator, returnval,
+		    &nfound_match);
+		ms->ms_value.q = nfound_match;
 		(*name_count)--;
+		*found_match |= nfound_match;
+
+		restore_cont(ms, &c);
+
 		if (rv != 1)
 		    *need_separator = oneed_separator;
+		ms->offset = offset;
+		ms->eoffset = eoffset;
 		return rv;

 	case FILE_NAME:
@ -1934,13 +1960,10 @@ file_strncmp(const char *s1, const char *s2, size_t len, size_t maxlen,
 			}
 			else if ((flags & STRING_COMPACT_WHITESPACE) &&
 			    isspace(*a)) {
-				/* XXX Dirty. The data and the pattern is what is causing this.
-				       Revert _i for the next port and see if it still matters. */
-				uint32_t _i = 0;
 				a++;
 				if (isspace(*b++)) {
 					if (!isspace(*a))
-						while (EXPECTED(_i++ < 2048) && b < eb && isspace(*b))
+						while (b < eb && isspace(*b))
 							b++;
 				}
 				else {
@ -2282,9 +2305,10 @@ error_out:
 		}
 		break;
 	}
-	case FILE_INDIRECT:
 	case FILE_USE:
+		return ms->ms_value.q != 0;
 	case FILE_NAME:
+	case FILE_INDIRECT:
 		return 1;
 	case FILE_DER:
 		matched = der_cmp(ms, m);
--- a/ext/fileinfo/magicdata.patch
+++ b/ext/fileinfo/magicdata.patch
@ -1,99 +1,22 @@
-diff -u magic.orig/Magdir/images magic/Magdir/images
--- magic.orig/Magdir/images	2020-05-31 12:34:40.000000000 +0200
-+++ magic/Magdir/images	2020-07-05 20:00:41.664783368 +0200
-@@ -1,6 +1,6 @@
- 
+diff -ur Magdir.orig/mail.news Magdir/mail.news
+--- Magdir.orig/mail.news	2021-03-31 01:47:28.000000000 +0200
+++ Magdir/mail.news	2021-04-05 19:41:55.168556972 +0200
+@@ -1,5 +1,5 @@
 #------------------------------------------------------------------------------
-# $File: images,v 1.181 2020/05/30 23:49:03 christos Exp $
-+# $File: images,v 1.183 2020/06/26 17:08:32 christos Exp $
- # images:  file(1) magic for image formats (see also "iff", and "c-lang" for
- # XPM bitmaps)
+-# $File: mail.news,v 1.26 2021/03/21 14:37:03 christos Exp $
+# $File: mail.news,v 1.27 2021/04/05 16:36:14 christos Exp $
+ # mail.news:  file(1) magic for mail and news
 #
-@@ -32,22 +32,22 @@
- # Prevent conflicts with CRI ADX.
- >(2.S-2) belong	!0x28632943
- # skip more garbage like *.iso by looking for positive image type
->>2	ubyte			>0
-+>2	ubyte			>0
- # skip some compiled terminfo like xterm+tmux by looking for image type less equal 33
->>>2	ubyte			<34
-+>>2	ubyte			<34
- # skip arches.3200 , Finder.Root , Slp.1 by looking for low pixel depth 1 8 15 16 24 32
->>>>16	ubyte			1
->>>>>0		use		tga-image
->>>>16	ubyte			8
->>>>>0		use		tga-image
->>>>16	ubyte			15
->>>>>0		use		tga-image
->>>>16	ubyte			16
->>>>>0		use		tga-image
->>>>16	ubyte			24
->>>>>0		use		tga-image
->>>>16	ubyte			32
->>>>>0		use		tga-image
-+>>>16	ubyte			1
-+>>>>0		use		tga-image
-+>>>16	ubyte			8
-+>>>>0		use		tga-image
-+>>>16	ubyte			15
-+>>>>0		use		tga-image
-+>>>16	ubyte			16
-+>>>>0		use		tga-image
-+>>>16	ubyte			24
-+>>>>0		use		tga-image
-+>>>16	ubyte			32
-+>>>>0		use		tga-image
- #	display tga bitmap image information
- 0	name				tga-image
- >2	ubyte		<34		Targa image data
-@@ -615,7 +615,7 @@
- 0	leshort		40
- # skip bad samples like GAME by looking for valid number of color planes
- >12	uleshort	1		Device independent bitmap graphic
-!:mime	image/bmp
-+!:mime	image/x-ms-bmp
- !:apple	????BMPp
- !:ext	dib
- >>4	lelong		x		\b, %d x
-@@ -641,7 +641,7 @@
- >>18	leshort		x		\b, %d x
- >>20	leshort		x		%d
- >14	leshort		64		PC bitmap, OS/2 2.x format
-!:mime	image/bmp
-+!:mime	image/x-ms-bmp
- !:apple	????BMPp
- !:ext	bmp
- # image width and height fields are unsigned integers for OS/2
-@@ -662,7 +662,7 @@
- #>>(10.l) ubequad		!0	\b, bits 0x%16.16llx
- # BITMAPV2INFOHEADER	adds RGB bit masks
- >14	leshort		52		PC bitmap, Adobe Photoshop
-!:mime	image/bmp
-+!:mime	image/x-ms-bmp
- !:apple	????BMPp
- !:ext	bmp
- >>18	lelong		x		\b, %d x
-@@ -670,7 +670,7 @@
- >>28	leshort		x		%d
- # BITMAPV3INFOHEADER	adds alpha channel bit mask
- >14	leshort		56		PC bitmap, Adobe Photoshop with alpha channel mask
-!:mime	image/bmp
-+!:mime	image/x-ms-bmp
- !:apple	????BMPp
- !:ext	bmp
- >>18	lelong		x		\b, %d x
-@@ -679,7 +679,7 @@
- >14	leshort		40
- # jump 4 bytes before end of file/header to skip fmt-116-signature-id-118.dib
- >>(2.l-4)	ulong	x		PC bitmap, Windows 3.x format
-!:mime	image/bmp
-+!:mime	image/x-ms-bmp
- !:apple	????BMPp
- >>>18	lelong		x		\b, %d x
- >>>22	lelong		x		%d
-diff -u magic.orig/Magdir/rpm magic/Magdir/rpm
--- magic.orig/Magdir/rpm	2019-02-22 14:06:34.000000000 +0100
-+++ magic/Magdir/rpm	2020-07-05 19:38:02.720419674 +0200
+ # Unfortunately, saved netnews also has From line added in some news software.
+@@ -81,4 +81,4 @@
+ # File format spec: https://wiki.dovecot.org/Design/Dcrypt/#File_format
+ # From: Stephen Gildea
+ 0	string	CRYPTED\003\007		Dovecot encrypted message
+->9	byte	xu			\b, dcrypt version %d
+>9	byte	x			\b, dcrypt version %d
+diff -ur Magdir.orig/rpm Magdir/rpm
+--- Magdir.orig/rpm	2021-02-23 01:49:24.000000000 +0100
+++ Magdir/rpm	2021-04-05 19:40:55.080911893 +0200
@@ -29,6 +29,7 @@
 >>8	beshort		17		SuperH
 >>8	beshort		18		Xtensa
@ -102,9 +25,9 @@ diff -u magic.orig/Magdir/rpm magic/Magdir/rpm
 
 #delta RPM    Daniel Novotny (dnovotny@redhat.com)
 0	string		drpm		Delta RPM
-diff -u magic.orig/Magdir/securitycerts magic/Magdir/securitycerts
--- magic.orig/Magdir/securitycerts	2019-02-22 14:06:34.000000000 +0100
-+++ magic/Magdir/securitycerts	2020-07-05 19:38:02.720419674 +0200
+diff -ur Magdir.orig/securitycerts Magdir/securitycerts
+--- Magdir.orig/securitycerts	2021-02-23 01:49:24.000000000 +0100
+++ Magdir/securitycerts	2021-04-05 19:40:55.080911893 +0200
@@ -4,3 +4,5 @@
 0	search/1		-----BEGIN\ CERTIFICATE------	RFC1421 Security Certificate text
 0	search/1		-----BEGIN\ NEW\ CERTIFICATE	RFC1421 Security Certificate Signing Request text
--- a/ext/fileinfo/tests/bug68819_001.phpt
+++ b/ext/fileinfo/tests/bug68819_001.phpt
@ -15,4 +15,4 @@ $type = $finfo->buffer($string);
 var_dump($type);
 ?>
 --EXPECT--
-string(60) "ASCII text, with very long lines, with CRLF line terminators"
+string(66) "ASCII text, with very long lines (617), with CRLF line terminators"
--- a/ext/fileinfo/tests/bug68819_002.phpt
+++ b/ext/fileinfo/tests/bug68819_002.phpt
@ -23,4 +23,4 @@ var_dump($type);

 ?>
 --EXPECT--
-string(60) "ASCII text, with very long lines, with CRLF line terminators"
+string(67) "ASCII text, with very long lines (8191), with CRLF line terminators"
--- a/ext/fileinfo/tests/finfo_file_basic.phpt
+++ b/ext/fileinfo/tests/finfo_file_basic.phpt
@ -24,5 +24,5 @@ try {
 *** Testing finfo_file() : basic functionality ***
 string(28) "text/x-php; charset=us-ascii"
 string(22) "PHP script, ASCII text"
-string(25) "text/plain; charset=utf-8"
+string(28) "text/plain; charset=us-ascii"
 finfo_file(): Argument #1 ($finfo) must not contain any null bytes
--- a/ext/fileinfo/tests/magic
+++ b/ext/fileinfo/tests/magic
--- a/ext/fileinfo/tests/magic私はガラスを食べられます
+++ b/ext/fileinfo/tests/magic私はガラスを食べられます