2019-06-29 07:35:32 +08:00
|
|
|
#!/bin/bash
|
|
|
|
|
|
|
|
set -ex
|
|
|
|
|
|
|
|
DEQP_OPTIONS=(--deqp-surface-width=256 --deqp-surface-height=256)
|
|
|
|
DEQP_OPTIONS+=(--deqp-surface-type=pbuffer)
|
|
|
|
DEQP_OPTIONS+=(--deqp-gl-config-name=rgba8888d24s8ms0)
|
|
|
|
DEQP_OPTIONS+=(--deqp-visibility=hidden)
|
|
|
|
|
2019-09-04 06:52:33 +08:00
|
|
|
# It would be nice to be able to enable the watchdog, so that hangs in a test
|
|
|
|
# don't need to wait the full hour for the run to time out. However, some
|
|
|
|
# shaders end up taking long enough to compile
|
|
|
|
# (dEQP-GLES31.functional.ubo.random.all_per_block_buffers.20 for example)
|
|
|
|
# that they'll sporadically trigger the watchdog.
|
|
|
|
#DEQP_OPTIONS+=(--deqp-watchdog=enable)
|
|
|
|
|
2019-06-29 07:35:32 +08:00
|
|
|
if [ -z "$DEQP_VER" ]; then
|
2019-11-19 15:39:00 +08:00
|
|
|
echo 'DEQP_VER must be set to something like "gles2", "gles31" or "vk" for the test run'
|
2019-06-29 07:35:32 +08:00
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
|
2019-11-14 19:09:44 +08:00
|
|
|
if [ "$DEQP_VER" == "vk" ]; then
|
|
|
|
if [ -z "$VK_DRIVER" ]; then
|
|
|
|
echo 'VK_DRIVER must be to something like "radeon" or "intel" for the test run'
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
|
2019-06-29 07:35:32 +08:00
|
|
|
if [ -z "$DEQP_SKIPS" ]; then
|
|
|
|
echo 'DEQP_SKIPS must be set to something like "deqp-default-skips.txt"'
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
|
ci: Use cts_runner for our dEQP runs.
This runner is a little project by Bas, written in C++, that spawns
threads that then loop grabbing chunks of the (randomly shuffled but
consistently so) test list and hand it to a dEQP instance. As the
remaining list gets shorter, so do the chunks, so hopefully the
threads all complete effectively at once. It also handles restarting
after crashes automatically. I've extended the runner a bit to do
what I was doing in the bash scripts before, like the skip list and
expected failures handling. This project should also be a good
baseline for extending to handle retesting of intermittent failures.
By switching to it, we can have the swrast tests just take up one job
slot on the shared runners and keep their allotment of CPUs busy,
instead of taking up job slots with single-threaded dEQP jobs. It
will also let us (eventually, once I reprovision) switch the freedreno
runners over to threading within the job instead of running concurrent
jobs, so that memory scribbles in one pipeline don't affect unrelated
pipelines, and I can experiment with their parallelism (particularly
on a306 where we are frequently backed up) without trashing other
people's jobs.
What we lose in this process is per-test output in the log (not a big
loss, I think, since we summarize fails at the end and reducing log
length keeps chrome from choking on our logs so badly). We also drop
the renderer sanity checking, since it's not saving qpa files for us
to go poke through. Given that all the drivers involved have fail
lists, if we got the wrong renderer somehow, we'd get a job failure
anyway.
v2: Rebase on droppong of the autoscale cluster and the arm64
build/test split. Use a script to deduplicate the cts-runner
build.
v3: Rebase on the amd64 build/test container split.
Acked-by: Daniel Stone <daniels@collabora.com> (v1)
Reviewed-by: Tomeu Vizoso <tomeu.vizoso@collabora.com> (v2)
2019-11-05 02:54:41 +08:00
|
|
|
ARTIFACTS=`pwd`/artifacts
|
2019-06-29 07:35:32 +08:00
|
|
|
|
|
|
|
# Set up the driver environment.
|
|
|
|
export LD_LIBRARY_PATH=`pwd`/install/lib/
|
|
|
|
export EGL_PLATFORM=surfaceless
|
2019-11-14 19:09:44 +08:00
|
|
|
export VK_ICD_FILENAMES=`pwd`/install/share/vulkan/icd.d/"$VK_DRIVER"_icd.x86_64.json
|
2019-06-29 07:35:32 +08:00
|
|
|
|
|
|
|
# the runner was failing to look for libkms in /usr/local/lib for some reason
|
|
|
|
# I never figured out.
|
|
|
|
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib
|
|
|
|
|
|
|
|
RESULTS=`pwd`/results
|
|
|
|
mkdir -p $RESULTS
|
|
|
|
|
2019-11-19 15:39:00 +08:00
|
|
|
# Generate test case list file.
|
|
|
|
if [ "$DEQP_VER" == "vk" ]; then
|
|
|
|
cp /deqp/mustpass/vk-master.txt /tmp/case-list.txt
|
|
|
|
DEQP=/deqp/external/vulkancts/modules/vulkan/deqp-vk
|
|
|
|
else
|
|
|
|
cp /deqp/mustpass/$DEQP_VER-master.txt /tmp/case-list.txt
|
|
|
|
DEQP=/deqp/modules/$DEQP_VER/deqp-$DEQP_VER
|
|
|
|
fi
|
2019-06-29 07:35:32 +08:00
|
|
|
|
|
|
|
# If the job is parallel, take the corresponding fraction of the caselist.
|
|
|
|
# Note: N~M is a gnu sed extension to match every nth line (first line is #1).
|
|
|
|
if [ -n "$CI_NODE_INDEX" ]; then
|
|
|
|
sed -ni $CI_NODE_INDEX~$CI_NODE_TOTAL"p" /tmp/case-list.txt
|
|
|
|
fi
|
|
|
|
|
|
|
|
if [ ! -s /tmp/case-list.txt ]; then
|
|
|
|
echo "Caselist generation failed"
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
|
ci: Use cts_runner for our dEQP runs.
This runner is a little project by Bas, written in C++, that spawns
threads that then loop grabbing chunks of the (randomly shuffled but
consistently so) test list and hand it to a dEQP instance. As the
remaining list gets shorter, so do the chunks, so hopefully the
threads all complete effectively at once. It also handles restarting
after crashes automatically. I've extended the runner a bit to do
what I was doing in the bash scripts before, like the skip list and
expected failures handling. This project should also be a good
baseline for extending to handle retesting of intermittent failures.
By switching to it, we can have the swrast tests just take up one job
slot on the shared runners and keep their allotment of CPUs busy,
instead of taking up job slots with single-threaded dEQP jobs. It
will also let us (eventually, once I reprovision) switch the freedreno
runners over to threading within the job instead of running concurrent
jobs, so that memory scribbles in one pipeline don't affect unrelated
pipelines, and I can experiment with their parallelism (particularly
on a306 where we are frequently backed up) without trashing other
people's jobs.
What we lose in this process is per-test output in the log (not a big
loss, I think, since we summarize fails at the end and reducing log
length keeps chrome from choking on our logs so badly). We also drop
the renderer sanity checking, since it's not saving qpa files for us
to go poke through. Given that all the drivers involved have fail
lists, if we got the wrong renderer somehow, we'd get a job failure
anyway.
v2: Rebase on droppong of the autoscale cluster and the arm64
build/test split. Use a script to deduplicate the cts-runner
build.
v3: Rebase on the amd64 build/test container split.
Acked-by: Daniel Stone <daniels@collabora.com> (v1)
Reviewed-by: Tomeu Vizoso <tomeu.vizoso@collabora.com> (v2)
2019-11-05 02:54:41 +08:00
|
|
|
if [ -n "$DEQP_EXPECTED_FAILS" ]; then
|
|
|
|
XFAIL="--xfail-list $ARTIFACTS/$DEQP_EXPECTED_FAILS"
|
2019-06-29 07:35:32 +08:00
|
|
|
fi
|
|
|
|
|
ci: Use cts_runner for our dEQP runs.
This runner is a little project by Bas, written in C++, that spawns
threads that then loop grabbing chunks of the (randomly shuffled but
consistently so) test list and hand it to a dEQP instance. As the
remaining list gets shorter, so do the chunks, so hopefully the
threads all complete effectively at once. It also handles restarting
after crashes automatically. I've extended the runner a bit to do
what I was doing in the bash scripts before, like the skip list and
expected failures handling. This project should also be a good
baseline for extending to handle retesting of intermittent failures.
By switching to it, we can have the swrast tests just take up one job
slot on the shared runners and keep their allotment of CPUs busy,
instead of taking up job slots with single-threaded dEQP jobs. It
will also let us (eventually, once I reprovision) switch the freedreno
runners over to threading within the job instead of running concurrent
jobs, so that memory scribbles in one pipeline don't affect unrelated
pipelines, and I can experiment with their parallelism (particularly
on a306 where we are frequently backed up) without trashing other
people's jobs.
What we lose in this process is per-test output in the log (not a big
loss, I think, since we summarize fails at the end and reducing log
length keeps chrome from choking on our logs so badly). We also drop
the renderer sanity checking, since it's not saving qpa files for us
to go poke through. Given that all the drivers involved have fail
lists, if we got the wrong renderer somehow, we'd get a job failure
anyway.
v2: Rebase on droppong of the autoscale cluster and the arm64
build/test split. Use a script to deduplicate the cts-runner
build.
v3: Rebase on the amd64 build/test container split.
Acked-by: Daniel Stone <daniels@collabora.com> (v1)
Reviewed-by: Tomeu Vizoso <tomeu.vizoso@collabora.com> (v2)
2019-11-05 02:54:41 +08:00
|
|
|
set +e
|
|
|
|
|
2019-11-18 03:33:01 +08:00
|
|
|
run_cts() {
|
2019-11-19 15:39:00 +08:00
|
|
|
deqp=$1
|
|
|
|
caselist=$2
|
|
|
|
output=$3
|
2019-11-18 03:33:01 +08:00
|
|
|
deqp-runner \
|
2019-11-19 15:39:00 +08:00
|
|
|
--deqp $deqp \
|
2019-11-18 03:33:01 +08:00
|
|
|
--output $output \
|
|
|
|
--caselist $caselist \
|
|
|
|
--exclude-list $ARTIFACTS/$DEQP_SKIPS \
|
|
|
|
$XFAIL \
|
|
|
|
--job ${DEQP_PARALLEL:-1} \
|
|
|
|
--allow-flakes true \
|
|
|
|
-- \
|
|
|
|
"${DEQP_OPTIONS[@]}"
|
|
|
|
}
|
|
|
|
|
|
|
|
report_flakes() {
|
|
|
|
if [ -z "$FLAKES_CHANNEL" ]; then
|
|
|
|
return 0
|
|
|
|
fi
|
|
|
|
flakes=$1
|
|
|
|
bot="$CI_RUNNER_DESCRIPTION-$CI_PIPELINE_ID"
|
|
|
|
channel="$FLAKES_CHANNEL"
|
|
|
|
(
|
|
|
|
echo NICK $bot
|
|
|
|
echo USER $bot unused unused :Gitlab CI Notifier
|
|
|
|
sleep 10
|
|
|
|
echo "JOIN $channel"
|
|
|
|
sleep 1
|
|
|
|
desc="Flakes detected in job: $CI_JOB_URL on $CI_RUNNER_DESCRIPTION"
|
|
|
|
if [ -n "CI_MERGE_REQUEST_SOURCE_BRANCH_NAME" ]; then
|
|
|
|
desc="$desc on branch $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME ($CI_MERGE_REQUEST_TITLE)"
|
|
|
|
fi
|
|
|
|
echo "PRIVMSG $channel :$desc"
|
|
|
|
for flake in `cat $flakes`; do
|
|
|
|
echo "PRIVMSG $channel :$flake"
|
|
|
|
done
|
|
|
|
echo "PRIVMSG $channel :See $CI_JOB_URL/artifacts/browse/results/"
|
|
|
|
echo "QUIT"
|
|
|
|
) | nc irc.freenode.net 6667 > /dev/null
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2019-11-18 03:57:26 +08:00
|
|
|
extract_xml_result() {
|
|
|
|
testcase=$1
|
|
|
|
shift 1
|
|
|
|
qpas=$*
|
|
|
|
start="#beginTestCaseResult $testcase"
|
|
|
|
for qpa in $qpas; do
|
|
|
|
while IFS= read -r line; do
|
|
|
|
if [ "$line" = "$start" ]; then
|
|
|
|
dst="$testcase.qpa"
|
|
|
|
echo "#beginSession" > $dst
|
|
|
|
echo $line >> $dst
|
|
|
|
while IFS= read -r line; do
|
|
|
|
if [ "$line" = "#endTestCaseResult" ]; then
|
|
|
|
echo $line >> $dst
|
|
|
|
echo "#endSession" >> $dst
|
|
|
|
/deqp/executor/testlog-to-xml $dst "$RESULTS/$testcase.xml"
|
|
|
|
# copy the stylesheets here so they only end up in artifacts
|
|
|
|
# if we have one or more result xml in artifacts
|
|
|
|
cp /deqp/testlog.{css,xsl} "$RESULTS/"
|
|
|
|
return 0
|
|
|
|
fi
|
|
|
|
echo $line >> $dst
|
|
|
|
done
|
|
|
|
return 1
|
|
|
|
fi
|
|
|
|
done < $qpa
|
|
|
|
done
|
|
|
|
}
|
|
|
|
|
|
|
|
extract_xml_results() {
|
|
|
|
qpas=$*
|
|
|
|
while IFS= read -r testcase; do
|
|
|
|
testcase=${testcase%,*}
|
|
|
|
extract_xml_result $testcase $qpas
|
|
|
|
done
|
|
|
|
}
|
|
|
|
|
2019-11-18 04:04:50 +08:00
|
|
|
# Generate junit results
|
|
|
|
generate_junit() {
|
|
|
|
results=$1
|
|
|
|
echo "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
|
|
|
|
echo "<testsuites>"
|
|
|
|
echo "<testsuite name=\"$DEQP_VER-$CI_NODE_INDEX\">"
|
|
|
|
while read line; do
|
|
|
|
testcase=${line%,*}
|
|
|
|
result=${line#*,}
|
|
|
|
# avoid counting Skip's in the # of tests:
|
|
|
|
if [ "$result" = "Skip" ]; then
|
|
|
|
continue;
|
|
|
|
fi
|
|
|
|
echo "<testcase name=\"$testcase\">"
|
|
|
|
if [ "$result" != "Pass" ]; then
|
|
|
|
echo "<failure type=\"$result\">"
|
|
|
|
echo "$result: See $CI_JOB_URL/artifacts/results/$testcase.xml"
|
|
|
|
echo "</failure>"
|
|
|
|
fi
|
|
|
|
echo "</testcase>"
|
|
|
|
done < $results
|
|
|
|
echo "</testsuite>"
|
|
|
|
echo "</testsuites>"
|
|
|
|
}
|
|
|
|
|
2019-11-18 03:33:01 +08:00
|
|
|
# wrapper to supress +x to avoid spamming the log
|
|
|
|
quiet() {
|
|
|
|
set +x
|
|
|
|
"$@"
|
|
|
|
set -x
|
|
|
|
}
|
|
|
|
|
2019-11-19 15:39:00 +08:00
|
|
|
run_cts $DEQP /tmp/case-list.txt $RESULTS/cts-runner-results.txt
|
ci: Use cts_runner for our dEQP runs.
This runner is a little project by Bas, written in C++, that spawns
threads that then loop grabbing chunks of the (randomly shuffled but
consistently so) test list and hand it to a dEQP instance. As the
remaining list gets shorter, so do the chunks, so hopefully the
threads all complete effectively at once. It also handles restarting
after crashes automatically. I've extended the runner a bit to do
what I was doing in the bash scripts before, like the skip list and
expected failures handling. This project should also be a good
baseline for extending to handle retesting of intermittent failures.
By switching to it, we can have the swrast tests just take up one job
slot on the shared runners and keep their allotment of CPUs busy,
instead of taking up job slots with single-threaded dEQP jobs. It
will also let us (eventually, once I reprovision) switch the freedreno
runners over to threading within the job instead of running concurrent
jobs, so that memory scribbles in one pipeline don't affect unrelated
pipelines, and I can experiment with their parallelism (particularly
on a306 where we are frequently backed up) without trashing other
people's jobs.
What we lose in this process is per-test output in the log (not a big
loss, I think, since we summarize fails at the end and reducing log
length keeps chrome from choking on our logs so badly). We also drop
the renderer sanity checking, since it's not saving qpa files for us
to go poke through. Given that all the drivers involved have fail
lists, if we got the wrong renderer somehow, we'd get a job failure
anyway.
v2: Rebase on droppong of the autoscale cluster and the arm64
build/test split. Use a script to deduplicate the cts-runner
build.
v3: Rebase on the amd64 build/test container split.
Acked-by: Daniel Stone <daniels@collabora.com> (v1)
Reviewed-by: Tomeu Vizoso <tomeu.vizoso@collabora.com> (v2)
2019-11-05 02:54:41 +08:00
|
|
|
DEQP_EXITCODE=$?
|
2019-08-27 03:57:16 +08:00
|
|
|
|
2019-11-18 04:04:50 +08:00
|
|
|
quiet generate_junit $RESULTS/cts-runner-results.txt > $RESULTS/results.xml
|
|
|
|
|
2019-06-29 07:35:32 +08:00
|
|
|
if [ $DEQP_EXITCODE -ne 0 ]; then
|
2019-11-18 03:28:16 +08:00
|
|
|
# preserve caselist files in case of failures:
|
|
|
|
cp /tmp/cts_runner.*.txt $RESULTS/
|
ci: Use cts_runner for our dEQP runs.
This runner is a little project by Bas, written in C++, that spawns
threads that then loop grabbing chunks of the (randomly shuffled but
consistently so) test list and hand it to a dEQP instance. As the
remaining list gets shorter, so do the chunks, so hopefully the
threads all complete effectively at once. It also handles restarting
after crashes automatically. I've extended the runner a bit to do
what I was doing in the bash scripts before, like the skip list and
expected failures handling. This project should also be a good
baseline for extending to handle retesting of intermittent failures.
By switching to it, we can have the swrast tests just take up one job
slot on the shared runners and keep their allotment of CPUs busy,
instead of taking up job slots with single-threaded dEQP jobs. It
will also let us (eventually, once I reprovision) switch the freedreno
runners over to threading within the job instead of running concurrent
jobs, so that memory scribbles in one pipeline don't affect unrelated
pipelines, and I can experiment with their parallelism (particularly
on a306 where we are frequently backed up) without trashing other
people's jobs.
What we lose in this process is per-test output in the log (not a big
loss, I think, since we summarize fails at the end and reducing log
length keeps chrome from choking on our logs so badly). We also drop
the renderer sanity checking, since it's not saving qpa files for us
to go poke through. Given that all the drivers involved have fail
lists, if we got the wrong renderer somehow, we'd get a job failure
anyway.
v2: Rebase on droppong of the autoscale cluster and the arm64
build/test split. Use a script to deduplicate the cts-runner
build.
v3: Rebase on the amd64 build/test container split.
Acked-by: Daniel Stone <daniels@collabora.com> (v1)
Reviewed-by: Tomeu Vizoso <tomeu.vizoso@collabora.com> (v2)
2019-11-05 02:54:41 +08:00
|
|
|
echo "Some unexpected results found (see cts-runner-results.txt in artifacts for full results):"
|
|
|
|
cat $RESULTS/cts-runner-results.txt | \
|
|
|
|
grep -v ",Pass" | \
|
|
|
|
grep -v ",Skip" | \
|
2019-11-18 03:16:09 +08:00
|
|
|
grep -v ",ExpectedFail" > \
|
|
|
|
$RESULTS/cts-runner-unexpected-results.txt
|
|
|
|
head -n 50 $RESULTS/cts-runner-unexpected-results.txt
|
2019-11-18 03:33:01 +08:00
|
|
|
|
2019-11-18 03:57:26 +08:00
|
|
|
# Save the logs for up to the first 50 unexpected results:
|
|
|
|
head -n 50 $RESULTS/cts-runner-unexpected-results.txt | quiet extract_xml_results /tmp/*.qpa
|
|
|
|
|
2019-11-18 03:33:01 +08:00
|
|
|
count=`cat $RESULTS/cts-runner-unexpected-results.txt | wc -l`
|
|
|
|
|
|
|
|
# Re-run fails to detect flakes. But use a small threshold, if
|
|
|
|
# something was fundamentally broken, we don't want to re-run
|
|
|
|
# the entire caselist
|
|
|
|
else
|
|
|
|
cat $RESULTS/cts-runner-results.txt | \
|
|
|
|
grep ",Flake" > \
|
|
|
|
$RESULTS/cts-runner-flakes.txt
|
|
|
|
|
|
|
|
count=`cat $RESULTS/cts-runner-flakes.txt | wc -l`
|
|
|
|
if [ $count -gt 0 ]; then
|
|
|
|
echo "Some flakes found (see cts-runner-flakes.txt in artifacts for full results):"
|
|
|
|
head -n 50 $RESULTS/cts-runner-flakes.txt
|
|
|
|
|
2019-11-23 05:30:18 +08:00
|
|
|
# Save the logs for up to the first 50 flakes:
|
|
|
|
head -n 50 $RESULTS/cts-runner-flakes.txt | quiet extract_xml_results /tmp/*.qpa
|
|
|
|
|
2019-11-18 03:33:01 +08:00
|
|
|
# Report the flakes to IRC channel for monitoring (if configured):
|
|
|
|
quiet report_flakes $RESULTS/cts-runner-flakes.txt
|
|
|
|
else
|
|
|
|
# no flakes, so clean-up:
|
|
|
|
rm $RESULTS/cts-runner-flakes.txt
|
|
|
|
fi
|
2019-09-13 03:34:50 +08:00
|
|
|
fi
|
2019-11-18 03:16:09 +08:00
|
|
|
|
|
|
|
exit $DEQP_EXITCODE
|