tests/tcg: add vectorised sha512 versions

This builds vectorised versions of sha512 to exercise the vector code:

  - aarch64 (AdvSimd)
  - i386 (SSE)
  - s390x (MVX)
  - ppc64/ppc64le (power10 vectors)

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220225172021.3493923-14-alex.bennee@linaro.org>
This commit is contained in:
Alex Bennée 2022-02-25 17:20:16 +00:00
parent d426f4fc6f
commit f8a4c6d728
7 changed files with 56 additions and 2 deletions

View File

@ -60,6 +60,13 @@ run-sha1-vector: sha1-vector run-sha1
TESTS += sha1-vector
# Vector versions of sha512 (-O3 triggers vectorisation)
sha512-vector: CFLAGS=-O3
sha512-vector: sha512.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
TESTS += sha512-vector
ifneq ($(HAVE_GDB_BIN),)
GDB_SCRIPT=$(SRC_PATH)/tests/guest-debug/run-test.py

View File

@ -79,6 +79,14 @@ run-sha1-vector: sha1-vector run-sha1
$(call diff-out, sha1-vector, sha1.out)
ARM_TESTS += sha1-vector
# Vector versions of sha512 (-O3 triggers vectorisation)
sha512-vector: CFLAGS=-O3
sha512-vector: sha512.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
ARM_TESTS += sha512-vector
TESTS += $(ARM_TESTS)
# On ARM Linux only supports 4k pages

View File

@ -71,3 +71,12 @@ TESTS=$(MULTIARCH_TESTS) $(I386_TESTS)
# On i386 and x86_64 Linux only supports 4k pages (large pages are a different hack)
EXTRA_RUNS+=run-test-mmap-4096
sha512-sse: CFLAGS=-msse4.1 -O3
sha512-sse: sha512.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
run-sha512-sse: QEMU_OPTS+=-cpu max
run-plugin-sha512-sse-with-%: QEMU_OPTS+=-cpu max
TESTS+=sha512-sse

View File

@ -13,12 +13,19 @@ $(PPC64_TESTS): CFLAGS += -mpower8-vector
PPC64_TESTS += mtfsf
ifneq ($(DOCKER_IMAGE)$(CROSS_CC_HAS_POWER10),)
PPC64_TESTS += byte_reverse
PPC64_TESTS += byte_reverse sha512-vector
endif
byte_reverse: CFLAGS += -mcpu=power10
run-byte_reverse: QEMU_OPTS+=-cpu POWER10
run-plugin-byte_reverse-with-%: QEMU_OPTS+=-cpu POWER10
sha512-vector: CFLAGS +=-mcpu=power10 -O3
sha512-vector: sha512.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
run-sha512-vector: QEMU_OPTS+=-cpu POWER10
run-plugin-sha512-vector-with-%: QEMU_OPTS+=-cpu POWER10
PPC64_TESTS += signal_save_restore_xer
TESTS += $(PPC64_TESTS)

View File

@ -10,12 +10,19 @@ endif
$(PPC64LE_TESTS): CFLAGS += -mpower8-vector
ifneq ($(DOCKER_IMAGE)$(CROSS_CC_HAS_POWER10),)
PPC64LE_TESTS += byte_reverse
PPC64LE_TESTS += byte_reverse sha512-vector
endif
byte_reverse: CFLAGS += -mcpu=power10
run-byte_reverse: QEMU_OPTS+=-cpu POWER10
run-plugin-byte_reverse-with-%: QEMU_OPTS+=-cpu POWER10
sha512-vector: CFLAGS +=-mcpu=power10 -O3
sha512-vector: sha512.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
run-sha512-vector: QEMU_OPTS+=-cpu POWER10
run-plugin-sha512-vector-with-%: QEMU_OPTS+=-cpu POWER10
PPC64LE_TESTS += mtfsf
PPC64LE_TESTS += signal_save_restore_xer

View File

@ -25,3 +25,12 @@ run-gdbstub-signals-s390x: signals-s390x
EXTRA_RUNS += run-gdbstub-signals-s390x
endif
# MVX versions of sha512
sha512-mvx: CFLAGS=-march=z13 -mvx -O3
sha512-mvx: sha512.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
run-sha512-mvx: QEMU_OPTS+=-cpu max
TESTS+=sha512-mvx

View File

@ -22,3 +22,10 @@ test-x86_64: test-i386.c test-i386.h test-i386-shift.h test-i386-muldiv.h
vsyscall: $(SRC_PATH)/tests/tcg/x86_64/vsyscall.c
$(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
# TCG does not yet support all SSE (SIGILL on pshufb)
# sha512-sse: CFLAGS=-march=core2 -O3
# sha512-sse: sha512.c
# $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
TESTS+=sha512-sse