mirror of
https://gcc.gnu.org/git/gcc.git
synced 2024-11-24 11:24:05 +08:00
79e3f7d54b
When gcc is configured for nvptx offloading with --without-cuda-driver and full CUDA isn't installed, many libgomp.oacc-*/* tests fail, some of them because cuda.h header can't be found, others because the tests can't be linked against -lcuda, -lcudart or -lcublas. I usually only have akmod-nvidia and xorg-x11-drv-nvidia-cuda rpms installed, so libcuda.so.1 can be dlopened and the offloading works, but linking against those libraries isn't possible nor are the headers around (for the plugin itself there is the fallback libgomp/plugin/cuda/cuda.h). The following patch adds 3 new effective targets and uses them in tests that needs those. 2021-05-27 Jakub Jelinek <jakub@redhat.com> * testsuite/lib/libgomp.exp (check_effective_target_openacc_cuda, check_effective_target_openacc_cublas, check_effective_target_openacc_cudart): New. * testsuite/libgomp.oacc-fortran/host_data-4.f90: Require effective target openacc_cublas. * testsuite/libgomp.oacc-fortran/host_data-2.f90: Likewise. * testsuite/libgomp.oacc-fortran/host_data-3.f: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-91.c: Require effective target openacc_cuda. * testsuite/libgomp.oacc-c-c++-common/lib-70.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-90.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-75.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-69.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-74.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-81.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-72.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-85.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/pr87835.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-82.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-73.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-83.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-78.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-76.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-84.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-79.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/host_data-1.c: Require effective targets openacc_cublas and openacc_cudart. * testsuite/libgomp.oacc-c-c++-common/context-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/context-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/context-3.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/context-4.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_get_property-nvptx.c: Require effective target openacc_cudart. * testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c: Add -DUSE_CUDA_H for effective target openacc_cuda and add && defined USE_CUDA_H to preprocessor conditionals. Guard -lcuda also on openacc_cuda effective target.
103 lines
2.2 KiB
Fortran
103 lines
2.2 KiB
Fortran
! Test host_data interoperability with CUDA blas using modules.
|
|
|
|
! { dg-do run { target openacc_nvidia_accel_selected } }
|
|
! { dg-additional-options "-lcublas -Wall -Wextra" }
|
|
! { dg-require-effective-target openacc_cublas }
|
|
|
|
module cublas
|
|
interface
|
|
subroutine cublassaxpy(N, alpha, x, incx, y, incy) bind(c, name="cublasSaxpy")
|
|
use iso_c_binding
|
|
integer(kind=c_int), value :: N
|
|
real(kind=c_float), value :: alpha
|
|
type(*), dimension(*) :: x
|
|
integer(kind=c_int), value :: incx
|
|
type(*), dimension(*) :: y
|
|
integer(kind=c_int), value :: incy
|
|
end subroutine cublassaxpy
|
|
end interface
|
|
|
|
contains
|
|
subroutine saxpy (nn, aa, xx, yy)
|
|
integer :: nn
|
|
real*4 :: aa, xx(nn), yy(nn)
|
|
integer i
|
|
!$acc routine
|
|
|
|
do i = 1, nn
|
|
yy(i) = yy(i) + aa * xx(i)
|
|
end do
|
|
end subroutine saxpy
|
|
|
|
subroutine validate_results (n, a, b)
|
|
integer :: n
|
|
real*4 :: a(n), b(n)
|
|
|
|
do i = 1, N
|
|
if (abs(a(i) - b(i)) > 0.0001) stop 1
|
|
end do
|
|
end subroutine validate_results
|
|
end module cublas
|
|
|
|
program test
|
|
use cublas
|
|
implicit none
|
|
|
|
integer, parameter :: N = 10
|
|
integer :: i
|
|
real*4 :: x_ref(N), y_ref(N), x(N), y(N), a
|
|
|
|
a = 2.0
|
|
|
|
do i = 1, N
|
|
x(i) = 4.0 * i
|
|
y(i) = 3.0
|
|
x_ref(i) = x(i)
|
|
y_ref(i) = y(i)
|
|
end do
|
|
|
|
call saxpy (N, a, x_ref, y_ref)
|
|
|
|
!$acc data copyin (x) copy (y)
|
|
!$acc host_data use_device (x, y)
|
|
call cublassaxpy(N, a, x, 1, y, 1)
|
|
!$acc end host_data
|
|
!$acc end data
|
|
|
|
call validate_results (N, y, y_ref)
|
|
|
|
!$acc data create (x) copyout (y)
|
|
!$acc parallel loop
|
|
do i = 1, N
|
|
y(i) = 3.0
|
|
end do
|
|
!$acc end parallel loop
|
|
|
|
!$acc host_data use_device (x, y)
|
|
call cublassaxpy(N, a, x, 1, y, 1)
|
|
!$acc end host_data
|
|
!$acc end data
|
|
|
|
call validate_results (N, y, y_ref)
|
|
|
|
y(:) = 3.0
|
|
|
|
!$acc data copyin (x) copyin (a) copy (y)
|
|
!$acc parallel present (x) pcopy (y) present (a)
|
|
call saxpy (N, a, x, y)
|
|
!$acc end parallel
|
|
!$acc end data
|
|
|
|
call validate_results (N, y, y_ref)
|
|
|
|
y(:) = 3.0
|
|
|
|
!$acc enter data copyin (x, a, y)
|
|
!$acc parallel present (x) pcopy (y) present (a)
|
|
call saxpy (N, a, x, y)
|
|
!$acc end parallel
|
|
!$acc exit data delete (x, a) copyout (y)
|
|
|
|
call validate_results (N, y, y_ref)
|
|
end program test
|