busybox/testsuite/awk.tests
Denys Vlasenko 38335df9e9 awk: restore assignment precedence to be lower than ternary ?:
Something is fishy with constrcts like "3==v=3" in gawk,
they should not work, but do. Ignore those for now.

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
2024-07-09 15:30:46 +02:00

621 lines
24 KiB
Bash
Executable File

#!/bin/sh
# Copyright 2007 by Denys Vlasenko <vda.linux@googlemail.com>
# Licensed under GPLv2, see file LICENSE in this source tree.
. ./testing.sh
sq="'"
# testing "description" "command" "result" "infile" "stdin"
testing "awk -F case 0" "awk -F '[#]' '{ print NF }'" "" "" ""
testing "awk -F case 1" "awk -F '[#]' '{ print NF }'" "0\n" "" "\n"
testing "awk -F case 2" "awk -F '[#]' '{ print NF }'" "2\n" "" "#\n"
testing "awk -F case 3" "awk -F '[#]' '{ print NF }'" "3\n" "" "#abc#\n"
testing "awk -F case 4" "awk -F '[#]' '{ print NF }'" "3\n" "" "#abc#zz\n"
testing "awk -F case 5" "awk -F '[#]' '{ print NF }'" "4\n" "" "#abc##zz\n"
testing "awk -F case 6" "awk -F '[#]' '{ print NF }'" "4\n" "" "z#abc##zz\n"
testing "awk -F case 7" "awk -F '[#]' '{ print NF }'" "5\n" "" "z##abc##zz\n"
# conditions and operators
testing "awk if operator == " "awk 'BEGIN{if(23==23) print \"foo\"}'" "foo\n" "" ""
testing "awk if operator != " "awk 'BEGIN{if(23!=23) print \"bar\"}'" "" "" ""
testing "awk if operator >= " "awk 'BEGIN{if(23>=23) print \"foo\"}'" "foo\n" "" ""
testing "awk if operator < " "awk 'BEGIN{if(2 < 13) print \"foo\"}'" "foo\n" "" ""
testing "awk if string == " "awk 'BEGIN{if(\"a\"==\"ab\") print \"bar\"}'" "" "" ""
# 4294967295 = 0xffffffff
testing "awk bitwise op" "awk '{ print or(4294967295,1) }'" "4294967295\n" "" "\n"
# we were testing for a non-empty body when deciding if a function was
# defined or not. The testcase below caused:
# awk: cmd. line:8: Call to undefined function
prg='
function empty_fun(count) {
# empty
}
END {
i=1
print "L" i "\n"
empty_fun(i + i + ++i)
print "L" i "\n"
}'
testing "awk handles empty function f(arg){}" \
"awk '$prg'" \
"L1\n\nL2\n\n" \
"" ""
prg='
function empty_fun(){}
END {empty_fun()
print "Ok"
}'
testing "awk handles empty function f(){}" \
"awk '$prg'" \
"Ok\n" \
"" ""
prg='
function outer_fun() {
return 1
}
END {
i=1
print "L" i "\n"
i += outer_fun()
print "L" i "\n"
}'
testing "awk properly handles function from other scope" \
"awk '$prg'" \
"L1\n\nL2\n\n" \
"" ""
prg='
END {
i=1
print "L" i "\n"
i + trigger_error_fun()
print "L" i "\n"
}'
testing "awk properly handles undefined function" \
"awk '$prg' 2>&1" \
"L1\n\nawk: cmd. line:5: Call to undefined function\n" \
"" ""
prg='
BEGIN {
v=1
a=2
print v (a)
}'
testing "awk 'v (a)' is not a function call, it is a concatenation" \
"awk '$prg' 2>&1" \
"12\n" \
"" ""
prg='func f(){print"F"};func g(){print"G"};BEGIN{f(g(),g())}'
testing "awk unused function args are evaluated" \
"awk '$prg' 2>&1" \
"G\nG\nF\n" \
"" ""
optional DESKTOP
testing "awk hex const 1" "awk '{ print or(0xffffffff,1) }'" "4294967295\n" "" "\n"
testing "awk hex const 2" "awk '{ print or(0x80000000,1) }'" "2147483649\n" "" "\n"
testing "awk oct const" "awk '{ print or(01234,1) }'" "669\n" "" "\n"
SKIP=
# check that "hex/oct integer" heuristic doesn't kick in on input
# (must be done only when parsing program text)
testing "awk input is never oct" "awk '{ print \$1, \$1+1 }'" "011 12\n" "" "011\n"
# check that "hex/oct integer" heuristic doesn't kick in on 00NN.NNN
testing "awk floating const with leading zeroes" \
"awk '{ printf \"%f %f\n\", \"000.123\", \"009.123\" }'" \
"0.123000 9.123000\n" \
"" "\n"
# long field seps requiring regex
testing "awk long field sep" \
"awk -F-- '{ print NF, length(\$NF), \$NF }'" \
"2 0 \n3 0 \n4 0 \n5 0 \n" \
"" \
"a--\na--b--\na--b--c--\na--b--c--d--"
testing "awk -F handles escapes" "awk -F'\\x21' '{print \$1}'" \
"a\n" \
"" \
"a!b\n"
# '@(samp|code|file)\{' is an invalid extended regex (unmatched '{'),
# but gawk 3.1.5 does not bail out on it.
testing "awk gsub falls back to non-extended-regex" \
"awk 'gsub(\"@(samp|code|file)\{\",\"\");'; echo \$?" "0\n" "" "Hi\n"
optional TAR BUNZIP2 FEATURE_SEAMLESS_BZ2
test x"$SKIP" != x"1" && tar xjf awk_t1.tar.bz2
testing "awk 'gcc build bug'" \
"awk -f awk_t1_opt-functions.awk -f awk_t1_opth-gen.awk <awk_t1_input | md5sum" \
"f842e256461a5ab1ec60b58d16f1114f -\n" \
"" ""
rm -rf awk_t1_* 2>/dev/null
SKIP=
Q='":"'
testing "awk NF in BEGIN" \
"awk 'BEGIN { print ${Q} NF ${Q} \$0 ${Q} \$1 ${Q} \$2 ${Q} }'" \
":0::::\n" \
"" ""
prg='
function b(tmp) {
tmp = 0;
print "" tmp; #this line causes the bug
return tmp;
}
function c(tmpc) {
tmpc = b(); return tmpc;
}
BEGIN {
print (c() ? "string" : "number");
}'
testing "awk string cast (bug 725)" \
"awk '$prg'" \
"0\nnumber\n" \
"" ""
testing "awk handles whitespace before array subscript" \
"awk 'BEGIN { arr [3] = 1; print arr [3] }'" "1\n" "" ""
# GNU awk 3.1.5's "print ERRNO" prints "No such file or directory" instead of "2",
# do we need to emulate that as well?
testing "awk handles non-existing file correctly" \
"awk 'BEGIN { getline line <\"doesnt_exist\"; print ERRNO; ERRNO=0; close(\"doesnt_exist\"); print ERRNO; print \"Ok\" }'" \
"2\n0\nOk\n" "" ""
prg='
BEGIN {
u["a"]=1
u["b"]=1
u["c"]=1
v["d"]=1
v["e"]=1
v["f"]=1
for (l in u) {
print "outer1", l;
for (l in v) {
print " inner", l;
}
print "outer2", l;
}
print "end", l;
l="a"
exit;
}'
testing "awk nested loops with the same variable" \
"awk '$prg'" \
"\
outer1 a
inner d
inner e
inner f
outer2 f
outer1 b
inner d
inner e
inner f
outer2 f
outer1 c
inner d
inner e
inner f
outer2 f
end f
" \
"" ""
prg='
BEGIN {
u["a"]=1
u["b"]=1
u["c"]=1
v["d"]=1
v["e"]=1
v["f"]=1
for (l in u) {
print "outer1", l;
for (l in v) {
print " inner", l;
break;
}
print "outer2", l;
}
print "end", l;
l="a"
exit;
}'
# It's not just buggy, it enters infinite loop. Thus disabled
false && test x"$SKIP_KNOWN_BUGS" = x"" && testing "awk nested loops with the same variable and break" \
"awk '$prg'" \
"\
outer1 a
inner d
outer2 d
outer1 b
inner d
outer2 d
outer1 c
inner d
outer2 d
end d
" \
"" ""
prg='
function f() {
for (l in v) {
print " inner", l;
return;
}
}
BEGIN {
u["a"]=1
u["b"]=1
u["c"]=1
v["d"]=1
v["e"]=1
v["f"]=1
for (l in u) {
print "outer1", l;
f();
print "outer2", l;
}
print "end", l;
l="a"
exit;
}'
# It's not just buggy, it enters infinite loop. Thus disabled
false && test x"$SKIP_KNOWN_BUGS" = x"" && testing "awk nested loops with the same variable and return" \
"awk '$prg'" \
"\
outer1 a
inner d
outer2 d
outer1 b
inner d
outer2 d
outer1 c
inner d
outer2 d
end d
" \
"" ""
prg='
BEGIN{
cnt = 0
a[cnt] = "zeroth"
a[++cnt] = "first"
delete a[cnt--]
print cnt
print "[0]:" a[0]
print "[1]:" a[1]
}'
testing "awk 'delete a[v--]' evaluates v-- once" \
"awk '$prg'" \
"\
0
[0]:zeroth
[1]:
" \
"" ""
testing "awk func arg parsing 1" \
"awk 'func f(,) { }' 2>&1" "awk: cmd. line:1: Unexpected token\n" "" ""
testing "awk func arg parsing 2" \
"awk 'func f(a,,b) { }' 2>&1" "awk: cmd. line:1: Unexpected token\n" "" ""
testing "awk func arg parsing 3" \
"awk 'func f(a,) { }' 2>&1" "awk: cmd. line:1: Unexpected token\n" "" ""
testing "awk func arg parsing 4" \
"awk 'func f(a b) { }' 2>&1" "awk: cmd. line:1: Unexpected token\n" "" ""
testing "awk handles empty ()" \
"awk 'BEGIN {print()}' 2>&1" "awk: cmd. line:1: Empty sequence\n" "" ""
testing "awk FS assignment" "awk '{FS=\":\"; print \$1}'" \
"a:b\ne\n" \
"" \
"a:b c:d\ne:f g:h"
optional FEATURE_AWK_LIBM
testing "awk large integer" \
"awk 'BEGIN{n=(2^31)-1; print n, int(n), n%1, ++n, int(n), n%1}'" \
"2147483647 2147483647 0 2147483648 2147483648 0\n" \
"" ""
SKIP=
testing "awk length(array)" \
"awk 'BEGIN{ A[1]=2; A[\"qwe\"]=\"asd\"; print length(A)}'" \
"2\n" \
"" ""
testing "awk length()" \
"awk '{print length; print length(); print length(\"qwe\"); print length(99+9)}'" \
"3\n3\n3\n3\n" \
"" "qwe"
testing "awk print length, 1" \
"awk '{ print length, 1 }'" \
"0 1\n" \
"" "\n"
testing "awk print length 1" \
"awk '{ print length 1 }'" \
"01\n" \
"" "\n"
testing "awk length == 0" \
"awk 'length == 0 { print \"foo\" }'" \
"foo\n" \
"" "\n"
testing "awk if (length == 0)" \
"awk '{ if (length == 0) { print \"bar\" } }'" \
"bar\n" \
"" "\n"
testing "awk -f and ARGC" \
"awk -f - input" \
"re\n2\n" \
"do re mi\n" \
'{print $2; print ARGC;}' \
optional FEATURE_AWK_GNU_EXTENSIONS
testing "awk -e and ARGC" \
"awk -e '{print \$2; print ARGC;}' input" \
"re\n2\n" \
"do re mi\n" \
""
SKIP=
testing "awk break" \
"awk -f - 2>&1; echo \$?" \
"awk: -:1: 'break' not in a loop\n1\n" \
"" \
'BEGIN { if (1) break; else a = 1 }'
testing "awk continue" \
"awk -f - 2>&1; echo \$?" \
"awk: -:1: 'continue' not in a loop\n1\n" \
"" \
'BEGIN { if (1) continue; else a = 1 }'
optional FEATURE_AWK_GNU_EXTENSIONS
testing "awk handles invalid for loop" \
"awk -e '{ for() }' 2>&1" "awk: cmd. line:1: Unexpected token\n" "" ""
SKIP=
optional FEATURE_AWK_GNU_EXTENSIONS
testing "awk handles colon not preceded by ternary" \
"awk -e foo:bar: 2>&1" "awk: cmd. line:1: Unexpected token\n" "" ""
SKIP=
optional FEATURE_AWK_GNU_EXTENSIONS
testing "awk errors on missing delete arg" \
"awk -e '{delete}' 2>&1" "awk: cmd. line:1: Too few arguments\n" "" ""
SKIP=
optional FEATURE_AWK_GNU_EXTENSIONS
testing "awk printf('%c') can output NUL" \
"awk '{printf(\"hello%c null\n\", 0)}'" "hello\0 null\n" "" "\n"
SKIP=
optional FEATURE_AWK_GNU_EXTENSIONS DESKTOP
testing "awk printf('%-10c') can output NUL" \
"awk 'BEGIN { printf \"[%-10c]\n\", 0 }' | od -tx1" "\
0000000 5b 00 20 20 20 20 20 20 20 20 20 5d 0a
0000015
" "" ""
SKIP=
# testing "description" "command" "result" "infile" "stdin"
testing 'awk negative field access' \
'awk 2>&1 -- '\''{ $(-1) }'\' \
"awk: cmd. line:1: Access to negative field\n" \
'' \
'anything'
# was misinterpreted as (("str"++) i) instead of ("str" (++i))
# (and was executed: "str"++ is "0", thus concatenating "0" and "1"):
testing 'awk do not allow "str"++' \
'awk -v i=1 "BEGIN {print \"str\" ++i}"' \
"str2\n" \
'' \
'anything'
# gawk compat: FS regex matches only non-empty separators:
# with -*, the splitting is NOT f o o b a r, but foo bar:
testing 'awk FS regex which can match empty string' \
"awk -F '-*' '{print \$1 \"-\" \$2 \"=\" \$3 \"*\" \$4}'" \
"foo-bar=*\n" \
'' \
'foo--bar'
# last+1 field should be empty (had a bug where it wasn't)
testing 'awk $NF is empty' \
"awk -F '=+' '{print \$NF}'" \
"\n" \
'' \
'a=====123='
testing "awk exit N propagates through END's exit" \
"awk 'BEGIN { exit 42 } END { exit }'; echo \$?" \
"42\n" \
'' ''
testing "awk print + redirect" \
"awk 'BEGIN { print \"STDERR %s\" >\"/dev/stderr\" }' 2>&1" \
"STDERR %s\n" \
'' ''
testing "awk \"cmd\" | getline" \
"awk 'BEGIN { \"echo HELLO\" | getline; print }'" \
"HELLO\n" \
'' ''
# printf %% should print one % (had a bug where it didn't)
testing 'awk printf %% prints one %' \
"awk 'BEGIN { printf \"%%\n\" }'" \
"%\n" \
'' ''
testing 'awk backslash+newline eaten with no trace' \
"awk 'BEGIN { printf \"Hello\\
world\n\" }'" \
"Hello world\n" \
'' ''
# User-supplied bug (SEGV) example, was causing use-after-realloc
testing 'awk assign while assign' \
"awk '\$5=\$\$5=\$0'; echo \$?" \
"\
─ process timing ────────────────────────────────────┬─ ─ process timing ────────────────────────────────────┬─ overall results ────┐ results ────┐
│ run time : │ run time : 0 days, 0 hrs, 0 min, 56 sec │ cycles done : 0 │ days, 0 hrs, 0 min, 56 sec │ cycles done : 0 │
│ last new find │ last new find : 0 days, 0 hrs, 0 min, 1 sec │ corpus count : 208 │ 0 days, 0 hrs, 0 min, 1 sec │ corpus count : 208 │
│last saved crash : │last saved crash : none seen yet │saved crashes : 0 │ seen yet │saved crashes : 0 │
│ last saved hang │ last saved hang : none seen yet │ saved hangs : 0 │ none seen yet │ saved hangs : 0 │
├─ cycle progress ─────────────────────┬─ ├─ cycle progress ─────────────────────┬─ map coverage┴──────────────────────┤ coverage┴──────────────────────┤
│ now processing : │ now processing : 184.1 (88.5%) │ map density : 0.30% / 0.52% │ (88.5%) │ map density : 0.30% / 0.52% │ │ now processing : 184.1 (88.5%) │ map density : 0.30% / 0.52% │
│ runs timed out │ runs timed out : 0 (0.00%) │ count coverage : 2.18 bits/tuple │ 0 (0.00%) │ count coverage : 2.18 bits/tuple │
├─ stage progress ─────────────────────┼─ ├─ stage progress ─────────────────────┼─ findings in depth ─────────────────┤ in depth ─────────────────┤
│ now trying : │ now trying : havoc │ favored items : 43 (20.67%) │ │ favored items : 43 (20.67%) │
│ stage execs : │ stage execs : 11.2k/131k (8.51%) │ new edges on : 52 (25.00%) │ (8.51%) │ new edges on │ stage execs : 11.2k/131k (8.51%) │ new edges on : 52 (25.00%) │ 52 (25.00%) │
│ total execs : │ total execs : 179k │ total crashes : 0 (0 saved) │ │ total crashes : 0 (0 saved) │ │ total execs : 179k │ total crashes : 0 (0 saved) │
│ exec speed : │ exec speed : 3143/sec │ total tmouts : 0 (0 saved) │ │ total tmouts : 0 (0 saved) │ │ exec speed : 3143/sec │ total tmouts : 0 (0 saved) │
├─ fuzzing strategy yields ├─ fuzzing strategy yields ────────────┴─────────────┬─ item geometry ───────┤ item geometry ───────┤
│ bit flips : │ bit flips : 11/648, 4/638, 5/618 │ levels : 4 │ 4/638, 5/618 │ levels : │ bit flips : 11/648, 4/638, 5/618 │ levels : 4 │ │
│ byte flips : │ byte flips : 0/81, 0/71, 0/52 │ pending : 199 │ 0/71, 0/52 │ pending : 199 │
│ arithmetics : 11/4494, │ arithmetics : 11/4494, 0/1153, 0/0 │ pend fav : 35 │ 0/0 │ pend fav : 35 │
│ known ints : 1/448, 0/1986, 0/2288 │ own finds : 207 │ known ints : │ known ints : 1/448, 0/1986, 0/2288 │ own finds : 207 │ 0/1986, 0/2288 │ own finds : 207 │
│ dictionary : 0/0, │ dictionary : 0/0, 0/0, 0/0, 0/0 │ imported : 0 │ 0/0, 0/0 │ imported : 0 │
│havoc/splice : 142/146k, 23/7616 │havoc/splice : 142/146k, 23/7616 │ stability : 100.00% │ stability : 100.00% │
│py/custom/rq : unused, unused, │py/custom/rq : unused, unused, unused, unused ├───────────────────────┘ unused ├───────────────────────┘
│ trim/eff : 57.02%/26, │ trim/eff : 57.02%/26, 0.00% │ [cpu000:100%] │ [cpu000:100%]
└────────────────────────────────────────────────────┘^C └────────────────────────────────────────────────────┘^C
0
" \
"" \
"\
─ process timing ────────────────────────────────────┬─ overall results ────┐
│ run time : 0 days, 0 hrs, 0 min, 56 sec │ cycles done : 0 │
│ last new find : 0 days, 0 hrs, 0 min, 1 sec │ corpus count : 208 │
│last saved crash : none seen yet │saved crashes : 0 │
│ last saved hang : none seen yet │ saved hangs : 0 │
├─ cycle progress ─────────────────────┬─ map coverage┴──────────────────────┤
│ now processing : 184.1 (88.5%) │ map density : 0.30% / 0.52% │
│ runs timed out : 0 (0.00%) │ count coverage : 2.18 bits/tuple │
├─ stage progress ─────────────────────┼─ findings in depth ─────────────────┤
│ now trying : havoc │ favored items : 43 (20.67%) │
│ stage execs : 11.2k/131k (8.51%) │ new edges on : 52 (25.00%) │
│ total execs : 179k │ total crashes : 0 (0 saved) │
│ exec speed : 3143/sec │ total tmouts : 0 (0 saved) │
├─ fuzzing strategy yields ────────────┴─────────────┬─ item geometry ───────┤
│ bit flips : 11/648, 4/638, 5/618 │ levels : 4 │
│ byte flips : 0/81, 0/71, 0/52 │ pending : 199 │
│ arithmetics : 11/4494, 0/1153, 0/0 │ pend fav : 35 │
│ known ints : 1/448, 0/1986, 0/2288 │ own finds : 207 │
│ dictionary : 0/0, 0/0, 0/0, 0/0 │ imported : 0 │
│havoc/splice : 142/146k, 23/7616 │ stability : 100.00% │
│py/custom/rq : unused, unused, unused, unused ├───────────────────────┘
│ trim/eff : 57.02%/26, 0.00% │ [cpu000:100%]
└────────────────────────────────────────────────────┘^C"
# If field separator FS=' ' (default), fields are split only on
# space or tab or linefeed, NOT other whitespace.
testing 'awk does not split on CR (char 13)' \
'awk '$sq'{ $1=$0; print }'$sq \
'word1 word2 word3\r word2 word3\r\n' \
'' 'word1 word2 word3\r'
# No, it seems a bug in gawk parser.
#testing "awk = has higher precedence than == (despite what gawk manpage claims)" \
# "awk 'BEGIN { v=1; print 2==v; print 2==v=2; print v; print v=3==3; print v}'" \
# '0\n1\n2\n1\n3\n' \
# '' ''
#
#testing 'awk assign while test' \
# 'awk '$sq'$1==$1="foo" {print $1}'$sq \
# "foo\n" \
# "" \
# "foo"
testing "awk = and ?: precedence" \
'awk '$sq'BEGIN { a=0?"bug":"ok"; print a}'$sq \
'ok\n' \
'' ''
# TODO: gawk can do this: awk 'BEGIN { u=v=w=1; print u=0?v=4:w=5; print u,v,w}'
# and even this: u=0?v=4?5:6:w=7?8:9
testing 'awk gensub backslashes \' \
'awk '$sq'BEGIN { s="\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
's=\\
\\|\\
' '' ''
testing 'awk gensub backslashes \\' \
'awk '$sq'BEGIN { s="\\\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
's=\\\\
\\|\\
' '' ''
# gawk 5.1.1 handles trailing unpaired \ inconsistently.
# If replace string is single \, it is used verbatim,
# but if it is \\\ (three slashes), gawk uses "\<NUL>" (!!!), not "\\" as you would expect.
testing 'awk gensub backslashes \\\' \
'awk '$sq'BEGIN { s="\\\\\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
's=\\\\\\
\\\\|\\\\
' '' ''
testing 'awk gensub backslashes \\\\' \
'awk '$sq'BEGIN { s="\\\\\\\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
's=\\\\\\\\
\\\\|\\\\
' '' ''
testing 'awk gensub backslashes \&' \
'awk '$sq'BEGIN { s="\\&"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
's=\\&
&|&
' '' ''
testing 'awk gensub backslashes \0' \
'awk '$sq'BEGIN { s="\\0"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
's=\\0
a|a
' '' ''
testing 'awk gensub backslashes \\0' \
'awk '$sq'BEGIN { s="\\\\0"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
's=\\\\0
\\0|\\0
' '' ''
# References to empty (not provided in the input) fields in first versus subsequent lines
testing 'awk references to empty fields' \
'awk '$sq'$2 != 0'$sq \
'a
b
' '' 'a\nb\n'
# The "b" in "abc" should not match <b* pattern.
# Currently we use REG_STARTEND ("This flag is a BSD extension, not present in POSIX")
# to implement the code to handle this correctly, but if your libc has no REG_STARTEND,
# the alternative code mishandles this case.
testing 'awk gsub erroneous word start match' \
"awk 'BEGIN { a=\"abc\"; gsub(/\<b*/,\"\",a); print a }'" \
'abc\n' \
'' ''
exit $FAILCOUNT