From 855f18ea0e6ff83ec9ed5af74e82bc4bff30867d Mon Sep 17 00:00:00 2001 From: Wolfgang Denk Date: Sat, 23 Mar 2013 23:50:34 +0000 Subject: [PATCH] setexpr: add regex substring matching and substitution Add "setexpr name gsub r s [t]" and "setexpr name sub r s [t]" commands which implement substring matching for the regular expression in the string , and substitution of the string . The result is assigned to the environment variable . If is not supplied, the previous value of is used instead. "gsub" performs global substitution, while "sub" will replace only the first substring. Both commands are closely modeled after the gawk functions with the same names. Examples: - Generate broadcast address by substituting the last two numbers of the IP address by "255.255": => print ipaddr ipaddr=192.168.1.104 => setexpr broadcast sub "(.*\\.).*\\..*" "\\1255.255" $ipaddr broadcast=192.168.255.255 - Depending on keyboard configuration (German vs. US keyboard) a barcode scanner may initialize the MAC address as C0:E5:4E:02:06:DC or as C0>E5>4E>02>06>DC. Make sure we always have a correct value: => print ethaddr ethaddr=C0>E5>4E>02>06>DC => setexpr ethaddr gsub > : ethaddr=C0:E5:4E:02:06:DC - Do the same, but substitute one step at a time in a loop until no futher matches: => setenv ethaddr C0>E5>4E>02>06>DC => while setexpr ethaddr sub > : > do > echo ----- > done ethaddr=C0:E5>4E>02>06>DC ----- ethaddr=C0:E5:4E>02>06>DC ----- ethaddr=C0:E5:4E:02>06>DC ----- ethaddr=C0:E5:4E:02:06>DC ----- ethaddr=C0:E5:4E:02:06:DC ----- C0:E5:4E:02:06:DC: No match => print ethaddr ethaddr=C0:E5:4E:02:06:DC etc. To enable this feature, the CONFIG_REGEX option has to be defined in the board config file. Signed-off-by: Wolfgang Denk --- common/cmd_setexpr.c | 280 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 277 insertions(+), 3 deletions(-) diff --git a/common/cmd_setexpr.c b/common/cmd_setexpr.c index ccd87f4bd83..93cb255b225 100644 --- a/common/cmd_setexpr.c +++ b/common/cmd_setexpr.c @@ -1,5 +1,6 @@ /* * Copyright 2008 Freescale Semiconductor, Inc. + * Copyright 2013 Wolfgang Denk * * See file CREDITS for list of people who contributed to this * project. @@ -50,13 +51,263 @@ static ulong get_arg(char *s, int w) } } +#ifdef CONFIG_REGEX + +#include + +#define SLRE_BUFSZ 16384 +#define SLRE_PATSZ 4096 + +/* + * memstr - Find the first substring in memory + * @s1: The string to be searched + * @s2: The string to search for + * + * Similar to and based on strstr(), + * but strings do not need to be NUL terminated. + */ +static char *memstr(const char *s1, int l1, const char *s2, int l2) +{ + if (!l2) + return (char *)s1; + + while (l1 >= l2) { + l1--; + if (!memcmp(s1, s2, l2)) + return (char *)s1; + s1++; + } + return NULL; +} + +static char *substitute(char *string, /* string buffer */ + int *slen, /* current string length */ + int ssize, /* string bufer size */ + const char *old,/* old (replaced) string */ + int olen, /* length of old string */ + const char *new,/* new (replacement) string */ + int nlen) /* length of new string */ +{ + char *p = memstr(string, *slen, old, olen); + + if (p == NULL) + return NULL; + + debug("## Match at pos %ld: match len %d, subst len %d\n", + (long)(p - string), olen, nlen); + + /* make sure replacement matches */ + if (*slen + nlen - olen > ssize) { + printf("## error: substitution buffer overflow\n"); + return NULL; + } + + /* move tail if needed */ + if (olen != nlen) { + int tail, len; + + len = (olen > nlen) ? olen : nlen; + + tail = ssize - (p + len - string); + + debug("## tail len %d\n", tail); + + memmove(p + nlen, p + olen, tail); + } + + /* insert substitue */ + memcpy(p, new, nlen); + + *slen += nlen - olen; + + return p + nlen; +} + +/* + * Perform regex operations on a environment variable + * + * Returns 0 if OK, 1 in case of errors. + */ +static int regex_sub(const char *name, + const char *r, const char *s, const char *t, + int global) +{ + struct slre slre; + char data[SLRE_BUFSZ]; + char *datap = data; + const char *value; + int res, len, nlen, loop; + + if (name == NULL) + return 1; + + if (slre_compile(&slre, r) == 0) { + printf("Error compiling regex: %s\n", slre.err_str); + return 1; + } + + if (t == NULL) { + value = getenv(name); + + if (value == NULL) { + printf("## Error: variable \"%s\" not defined\n", name); + return 1; + } + t = value; + } + + debug("REGEX on %s=%s\n", name, t); + debug("REGEX=\"%s\", SUBST=\"%s\", GLOBAL=%d\n", + r, s ? s : "", global); + + len = strlen(t); + if (len + 1 > SLRE_BUFSZ) { + printf("## error: subst buffer overflow: have %d, need %d\n", + SLRE_BUFSZ, len + 1); + return 1; + } + + strcpy(data, t); + + if (s == NULL) + nlen = 0; + else + nlen = strlen(s); + + for (loop = 0;; loop++) { + struct cap caps[slre.num_caps + 2]; + char nbuf[SLRE_PATSZ]; + const char *old; + char *np; + int i, olen; + + (void) memset(caps, 0, sizeof(caps)); + + res = slre_match(&slre, datap, len, caps); + + debug("Result: %d\n", res); + + for (i = 0; i < slre.num_caps; i++) { + if (caps[i].len > 0) { + debug("Substring %d: [%.*s]\n", i, + caps[i].len, caps[i].ptr); + } + } + + if (res == 0) { + if (loop == 0) { + printf("%s: No match\n", t); + return 1; + } else { + break; + } + } + + debug("## MATCH ## %s\n", data); + + if (s == NULL) { + printf("%s=%s\n", name, t); + return 1; + } + + old = caps[0].ptr; + olen = caps[0].len; + + if (nlen + 1 >= SLRE_PATSZ) { + printf("## error: pattern buffer overflow: have %d, need %d\n", + SLRE_BUFSZ, nlen + 1); + return 1; + } + strcpy(nbuf, s); + + debug("## SUBST(1) ## %s\n", nbuf); + + /* + * Handle back references + * + * Support for \0 ... \9, where \0 is the + * whole matched pattern (similar to &). + * + * Implementation is a bit simpleminded as + * backrefs are substituted sequentially, one + * by one. This will lead to somewhat + * unexpected results if the replacement + * strings contain any \N strings then then + * may get substitued, too. We accept this + * restriction for the sake of simplicity. + */ + for (i = 0; i < 10; ++i) { + char backref[2] = { + '\\', + '0', + }; + + if (caps[i].len == 0) + break; + + backref[1] += i; + + debug("## BACKREF %d: replace \"%.*s\" by \"%.*s\" in \"%s\"\n", + i, + 2, backref, + caps[i].len, caps[i].ptr, + nbuf); + + for (np = nbuf;;) { + char *p = memstr(np, nlen, backref, 2); + + if (p == NULL) + break; + + np = substitute(np, &nlen, + SLRE_PATSZ, + backref, 2, + caps[i].ptr, caps[i].len); + + if (np == NULL) + return 1; + } + } + debug("## SUBST(2) ## %s\n", nbuf); + + datap = substitute(datap, &len, SLRE_BUFSZ, + old, olen, + nbuf, nlen); + + if (datap == NULL) + return 1; + + debug("## REMAINDER: %s\n", datap); + + debug("## RESULT: %s\n", data); + + if (!global) + break; + } + debug("## FINAL (now setenv()) : %s\n", data); + + printf("%s=%s\n", name, data); + + return setenv(name, data); +} +#endif + static int do_setexpr(cmd_tbl_t *cmdtp, int flag, int argc, char * const argv[]) { ulong a, b; ulong value; int w; - if (argc < 3) + /* + * We take 3, 5, or 6 arguments: + * 3 : setexpr name value + * 5 : setexpr name val1 op val2 + * setexpr name [g]sub r s + * 6 : setexpr name [g]sub r s t + */ + + /* > 6 already tested by max command args */ + if ((argc < 3) || (argc == 4)) return CMD_RET_USAGE; w = cmd_get_data_size(argv[0], 4); @@ -69,6 +320,19 @@ static int do_setexpr(cmd_tbl_t *cmdtp, int flag, int argc, char * const argv[]) return 0; } + /* 5 or 6 args (6 args only with [g]sub) */ +#ifdef CONFIG_REGEX + /* + * rexep handling: "setexpr name [g]sub r s [t]" + * with 5 args, "t" will be NULL + */ + if (strcmp(argv[2], "gsub") == 0) + return regex_sub(argv[1], argv[3], argv[4], argv[5], 1); + + if (strcmp(argv[2], "sub") == 0) + return regex_sub(argv[1], argv[3], argv[4], argv[5], 0); +#endif + /* standard operators: "setexpr name val1 op val2" */ if (argc != 5) return CMD_RET_USAGE; @@ -114,13 +378,23 @@ static int do_setexpr(cmd_tbl_t *cmdtp, int flag, int argc, char * const argv[]) } U_BOOT_CMD( - setexpr, 5, 0, do_setexpr, + setexpr, 6, 0, do_setexpr, "set environment variable as the result of eval expression", "[.b, .w, .l] name [*]value1 [*]value2\n" " - set environment variable 'name' to the result of the evaluated\n" - " express specified by . can be &, |, ^, +, -, *, /, %\n" + " expression specified by . can be &, |, ^, +, -, *, /, %\n" " size argument is only meaningful if value1 and/or value2 are\n" " memory addresses (*)\n" "setexpr[.b, .w, .l] name [*]value\n" " - load a value into a variable" +#ifdef CONFIG_REGEX + "\n" + "setexpr name gsub r s [t]\n" + " - For each substring matching the regular expression in the\n" + " string , substitute the string . The result is\n" + " assigned to . If is not supplied, use the old\n" + " value of \n" + "setexpr name sub r s [t]\n" + " - Just like gsub(), but replace only the first matching substring" +#endif );