mirror of
https://gcc.gnu.org/git/gcc.git
synced 2024-11-27 05:44:15 +08:00
libgomp: Add new runtime routines omp_target_memcpy_async and omp_target_memcpy_rect_async
This patch adds two new OpenMP runtime routines: omp_target_memcpy_async and omp_target_memcpy_rect_async. Both functions are introduced in OpenMP 5.1 as asynchronous variants of omp_target_memcpy and omp_target_memcpy_rect. In contrast to the synchronous variants, the asynchronous functions have two additional function parameters to allow the specification of task dependences: int depobj_count omp_depend_t *depobj_list integer(c_int), value :: depobj_count integer(omp_depend_kind), optional :: depobj_list(*) The implementation splits the synchronous functions into two parts: (a) check and (b) copy. Then (a) is used in the asynchronous functions for the sequential part, and the actual copy process (b) is executed in a new created task. The sequential part (a) takes into account the requirements for the return values: "The routine returns zero if successful. Otherwise, it returns a non-zero value." (omp_target_memcpy_async, OpenMP 5.1 spec, section 3.8.7) "An application can determine the number of inclusive dimensions supported by an implementation by passing NULL pointers (or C_NULL_PTR, for Fortran) for both dst and src. The routine returns the number of dimensions supported by the implementation for the specified device numbers. No copy operation is performed." (omp_target_memcpy_rect_async, OpenMP 5.1 spec, section 3.8.8) Due to asynchronicity an error is thrown if the asynchronous memcpy is not successful (in contrast to the synchronous functions which use a return value unequal to zero). gcc/ChangeLog: * omp-low.cc (omp_runtime_api_call): Added target_memcpy_async and target_memcpy_rect_async to omp_runtime_apis array. libgomp/ChangeLog: * libgomp.map: Added omp_target_memcpy_async and omp_target_memcpy_rect_async. * libgomp.texi: Both functions are now supported. * omp.h.in: Added omp_target_memcpy_async and omp_target_memcpy_rect_async. * omp_lib.f90.in: Added interfaces for both new functions. * omp_lib.h.in: Likewise. * target.c (ialias_redirect): Added for GOMP_task. (omp_target_memcpy): Restructured into check and copy part. (omp_target_memcpy_check): New helper function for omp_target_memcpy and omp_target_memcpy_async that checks requirements. (omp_target_memcpy_copy): New helper function for omp_target_memcpy and omp_target_memcpy_async that performs the memcpy. (omp_target_memcpy_async_helper): New helper function that is used in omp_target_memcpy_async for the asynchronous task. (omp_target_memcpy_async): Added. (omp_target_memcpy_rect): Restructured into check and copy part. (omp_target_memcpy_rect_check): New helper function for omp_target_memcpy_rect and omp_target_memcpy_rect_async that checks requirements. (omp_target_memcpy_rect_copy): New helper function for omp_target_memcpy_rect and omp_target_memcpy_rect_async that performs the memcpy. (omp_target_memcpy_rect_async_helper): New helper function that is used in omp_target_memcpy_rect_async for the asynchronous task. (omp_target_memcpy_rect_async): Added. * task.c (ialias): Added for GOMP_task. * testsuite/libgomp.c-c++-common/target-memcpy-async-1.c: New test. * testsuite/libgomp.c-c++-common/target-memcpy-async-2.c: New test. * testsuite/libgomp.c-c++-common/target-memcpy-rect-async-1.c: New test. * testsuite/libgomp.c-c++-common/target-memcpy-rect-async-2.c: New test. * testsuite/libgomp.fortran/target-memcpy-async-1.f90: New test. * testsuite/libgomp.fortran/target-memcpy-async-2.f90: New test. * testsuite/libgomp.fortran/target-memcpy-rect-async-1.f90: New test. * testsuite/libgomp.fortran/target-memcpy-rect-async-2.f90: New test.
This commit is contained in:
parent
5143faee0d
commit
6c420193e8
@ -4011,7 +4011,9 @@ omp_runtime_api_call (const_tree fndecl)
|
||||
"target_is_accessible",
|
||||
"target_is_present",
|
||||
"target_memcpy",
|
||||
"target_memcpy_async",
|
||||
"target_memcpy_rect",
|
||||
"target_memcpy_rect_async",
|
||||
NULL,
|
||||
/* Now omp_* calls that are available as omp_* and omp_*_; however, the
|
||||
DECL_NAME is always omp_* without tailing underscore. */
|
||||
|
@ -230,6 +230,8 @@ OMP_5.1.1 {
|
||||
global:
|
||||
omp_get_mapped_ptr;
|
||||
omp_target_is_accessible;
|
||||
omp_target_memcpy_async;
|
||||
omp_target_memcpy_rect_async;
|
||||
} OMP_5.1;
|
||||
|
||||
GOMP_1.0 {
|
||||
|
@ -315,7 +315,7 @@ The OpenMP 4.5 specification is fully supported.
|
||||
routines @tab Y @tab
|
||||
@item @code{omp_target_is_accessible} runtime routine @tab Y @tab
|
||||
@item @code{omp_target_memcpy_async} and @code{omp_target_memcpy_rect_async}
|
||||
runtime routines @tab N @tab
|
||||
runtime routines @tab Y @tab
|
||||
@item @code{omp_get_mapped_ptr} runtime routine @tab Y @tab
|
||||
@item @code{omp_calloc}, @code{omp_realloc}, @code{omp_aligned_alloc} and
|
||||
@code{omp_aligned_calloc} runtime routines @tab Y @tab
|
||||
|
@ -272,6 +272,10 @@ extern int omp_target_is_present (const void *, int) __GOMP_NOTHROW;
|
||||
extern int omp_target_memcpy (void *, const void *, __SIZE_TYPE__,
|
||||
__SIZE_TYPE__, __SIZE_TYPE__, int, int)
|
||||
__GOMP_NOTHROW;
|
||||
extern int omp_target_memcpy_async (void *, const void *, __SIZE_TYPE__,
|
||||
__SIZE_TYPE__, __SIZE_TYPE__, int, int,
|
||||
int, omp_depend_t *)
|
||||
__GOMP_NOTHROW;
|
||||
extern int omp_target_memcpy_rect (void *, const void *, __SIZE_TYPE__, int,
|
||||
const __SIZE_TYPE__ *,
|
||||
const __SIZE_TYPE__ *,
|
||||
@ -279,6 +283,14 @@ extern int omp_target_memcpy_rect (void *, const void *, __SIZE_TYPE__, int,
|
||||
const __SIZE_TYPE__ *,
|
||||
const __SIZE_TYPE__ *, int, int)
|
||||
__GOMP_NOTHROW;
|
||||
extern int omp_target_memcpy_rect_async (void *, const void *, __SIZE_TYPE__,
|
||||
int, const __SIZE_TYPE__ *,
|
||||
const __SIZE_TYPE__ *,
|
||||
const __SIZE_TYPE__ *,
|
||||
const __SIZE_TYPE__ *,
|
||||
const __SIZE_TYPE__ *, int, int, int,
|
||||
omp_depend_t *)
|
||||
__GOMP_NOTHROW;
|
||||
extern int omp_target_associate_ptr (const void *, const void *, __SIZE_TYPE__,
|
||||
__SIZE_TYPE__, int) __GOMP_NOTHROW;
|
||||
extern int omp_target_disassociate_ptr (const void *, int) __GOMP_NOTHROW;
|
||||
|
@ -798,6 +798,22 @@
|
||||
end function omp_target_memcpy
|
||||
end interface
|
||||
|
||||
interface
|
||||
function omp_target_memcpy_async (dst, src, length, dst_offset, &
|
||||
src_offset, dst_device_num, &
|
||||
src_device_num, depobj_count, &
|
||||
depobj_list) bind(c)
|
||||
use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t
|
||||
import :: omp_depend_kind
|
||||
integer(c_int) :: omp_target_memcpy_async
|
||||
type(c_ptr), value :: dst, src
|
||||
integer(c_size_t), value :: length, dst_offset, src_offset
|
||||
integer(c_int), value :: dst_device_num, src_device_num, &
|
||||
depobj_count
|
||||
integer(omp_depend_kind), optional :: depobj_list(*)
|
||||
end function omp_target_memcpy_async
|
||||
end interface
|
||||
|
||||
interface
|
||||
function omp_target_memcpy_rect (dst,src,element_size, num_dims, &
|
||||
volume, dst_offsets, src_offsets, &
|
||||
@ -815,6 +831,30 @@
|
||||
end function omp_target_memcpy_rect
|
||||
end interface
|
||||
|
||||
interface
|
||||
function omp_target_memcpy_rect_async (dst,src,element_size, &
|
||||
num_dims, volume, &
|
||||
dst_offsets, src_offsets, &
|
||||
dst_dimensions, &
|
||||
src_dimensions, &
|
||||
dst_device_num, &
|
||||
src_device_num, &
|
||||
depobj_count, &
|
||||
depobj_list) bind(c)
|
||||
use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t
|
||||
import :: omp_depend_kind
|
||||
integer(c_int) :: omp_target_memcpy_rect_async
|
||||
type(c_ptr), value :: dst, src
|
||||
integer(c_size_t), value :: element_size
|
||||
integer(c_int), value :: num_dims, dst_device_num, src_device_num, &
|
||||
depobj_count
|
||||
integer(c_size_t), intent(in) :: volume(*), dst_offsets(*), &
|
||||
src_offsets(*), dst_dimensions(*), &
|
||||
src_dimensions(*)
|
||||
integer(omp_depend_kind), optional :: depobj_list(*)
|
||||
end function omp_target_memcpy_rect_async
|
||||
end interface
|
||||
|
||||
interface
|
||||
function omp_target_associate_ptr (host_ptr, device_ptr, size, &
|
||||
device_offset, device_num) bind(c)
|
||||
|
@ -377,6 +377,22 @@
|
||||
end function omp_target_memcpy
|
||||
end interface
|
||||
|
||||
interface
|
||||
function omp_target_memcpy_async (dst, src, length, dst_offset, &
|
||||
& src_offset, dst_device_num, &
|
||||
& src_device_num, depobj_count, &
|
||||
& depobj_list) bind(c)
|
||||
use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t
|
||||
import :: omp_depend_kind
|
||||
integer(c_int) :: omp_target_memcpy_async
|
||||
type(c_ptr), value :: dst, src
|
||||
integer(c_size_t), value :: length, dst_offset, src_offset
|
||||
integer(c_int), value :: dst_device_num, src_device_num
|
||||
integer(c_int), value :: depobj_count
|
||||
integer(omp_depend_kind), optional :: depobj_list(*)
|
||||
end function omp_target_memcpy_async
|
||||
end interface
|
||||
|
||||
interface
|
||||
function omp_target_memcpy_rect (dst,src,element_size, num_dims, &
|
||||
& volume, dst_offsets, &
|
||||
@ -396,6 +412,31 @@
|
||||
end function omp_target_memcpy_rect
|
||||
end interface
|
||||
|
||||
interface
|
||||
function omp_target_memcpy_rect_async (dst,src,element_size, &
|
||||
& num_dims, volume, &
|
||||
& dst_offsets, src_offsets, &
|
||||
& dst_dimensions, &
|
||||
& src_dimensions, &
|
||||
& dst_device_num, &
|
||||
& src_device_num, &
|
||||
& depobj_count, &
|
||||
& depobj_list) bind(c)
|
||||
use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t
|
||||
import :: omp_depend_kind
|
||||
integer(c_int) :: omp_target_memcpy_rect_async
|
||||
type(c_ptr), value :: dst, src
|
||||
integer(c_size_t), value :: element_size
|
||||
integer(c_int), value :: num_dims, depobj_count
|
||||
integer(c_int), value :: dst_device_num, src_device_num
|
||||
integer(c_size_t), intent(in) :: volume(*), dst_offsets(*)
|
||||
integer(c_size_t), intent(in) :: src_offsets(*)
|
||||
integer(c_size_t), intent(in) :: dst_dimensions(*)
|
||||
integer(c_size_t), intent(in) :: src_dimensions(*)
|
||||
integer(omp_depend_kind), optional :: depobj_list(*)
|
||||
end function omp_target_memcpy_rect_async
|
||||
end interface
|
||||
|
||||
interface
|
||||
function omp_target_associate_ptr (host_ptr, device_ptr, size, &
|
||||
& device_offset, device_num) &
|
||||
|
290
libgomp/target.c
290
libgomp/target.c
@ -49,6 +49,8 @@ static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
|
||||
static inline void htab_free (void *ptr) { free (ptr); }
|
||||
#include "hashtab.h"
|
||||
|
||||
ialias_redirect (GOMP_task)
|
||||
|
||||
static inline hashval_t
|
||||
htab_hash (hash_entry_type element)
|
||||
{
|
||||
@ -3355,40 +3357,49 @@ omp_target_is_present (const void *ptr, int device_num)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
omp_target_memcpy (void *dst, const void *src, size_t length,
|
||||
size_t dst_offset, size_t src_offset, int dst_device_num,
|
||||
int src_device_num)
|
||||
static int
|
||||
omp_target_memcpy_check (int dst_device_num, int src_device_num,
|
||||
struct gomp_device_descr **dst_devicep,
|
||||
struct gomp_device_descr **src_devicep)
|
||||
{
|
||||
struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
|
||||
bool ret;
|
||||
|
||||
if (dst_device_num != gomp_get_num_devices ())
|
||||
{
|
||||
if (dst_device_num < 0)
|
||||
return EINVAL;
|
||||
|
||||
dst_devicep = resolve_device (dst_device_num);
|
||||
if (dst_devicep == NULL)
|
||||
*dst_devicep = resolve_device (dst_device_num);
|
||||
if (*dst_devicep == NULL)
|
||||
return EINVAL;
|
||||
|
||||
if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
||||
|| dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
||||
dst_devicep = NULL;
|
||||
if (!((*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
||||
|| (*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
||||
*dst_devicep = NULL;
|
||||
}
|
||||
|
||||
if (src_device_num != num_devices_openmp)
|
||||
{
|
||||
if (src_device_num < 0)
|
||||
return EINVAL;
|
||||
|
||||
src_devicep = resolve_device (src_device_num);
|
||||
if (src_devicep == NULL)
|
||||
*src_devicep = resolve_device (src_device_num);
|
||||
if (*src_devicep == NULL)
|
||||
return EINVAL;
|
||||
|
||||
if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
||||
|| src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
||||
src_devicep = NULL;
|
||||
if (!((*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
||||
|| (*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
||||
*src_devicep = NULL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
omp_target_memcpy_copy (void *dst, const void *src, size_t length,
|
||||
size_t dst_offset, size_t src_offset,
|
||||
struct gomp_device_descr *dst_devicep,
|
||||
struct gomp_device_descr *src_devicep)
|
||||
{
|
||||
bool ret;
|
||||
if (src_devicep == NULL && dst_devicep == NULL)
|
||||
{
|
||||
memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
|
||||
@ -3424,6 +3435,85 @@ omp_target_memcpy (void *dst, const void *src, size_t length,
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
int
|
||||
omp_target_memcpy (void *dst, const void *src, size_t length, size_t dst_offset,
|
||||
size_t src_offset, int dst_device_num, int src_device_num)
|
||||
{
|
||||
struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
|
||||
int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
|
||||
&dst_devicep, &src_devicep);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = omp_target_memcpy_copy (dst, src, length, dst_offset, src_offset,
|
||||
dst_devicep, src_devicep);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
void *dst;
|
||||
const void *src;
|
||||
size_t length;
|
||||
size_t dst_offset;
|
||||
size_t src_offset;
|
||||
struct gomp_device_descr *dst_devicep;
|
||||
struct gomp_device_descr *src_devicep;
|
||||
} omp_target_memcpy_data;
|
||||
|
||||
static void
|
||||
omp_target_memcpy_async_helper (void *args)
|
||||
{
|
||||
omp_target_memcpy_data *a = args;
|
||||
if (omp_target_memcpy_copy (a->dst, a->src, a->length, a->dst_offset,
|
||||
a->src_offset, a->dst_devicep, a->src_devicep))
|
||||
gomp_fatal ("omp_target_memcpy failed");
|
||||
}
|
||||
|
||||
int
|
||||
omp_target_memcpy_async (void *dst, const void *src, size_t length,
|
||||
size_t dst_offset, size_t src_offset,
|
||||
int dst_device_num, int src_device_num,
|
||||
int depobj_count, omp_depend_t *depobj_list)
|
||||
{
|
||||
struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
|
||||
unsigned int flags = 0;
|
||||
void *depend[depobj_count + 5];
|
||||
int i;
|
||||
int check = omp_target_memcpy_check (dst_device_num, src_device_num,
|
||||
&dst_devicep, &src_devicep);
|
||||
|
||||
omp_target_memcpy_data s = {
|
||||
.dst = dst,
|
||||
.src = src,
|
||||
.length = length,
|
||||
.dst_offset = dst_offset,
|
||||
.src_offset = src_offset,
|
||||
.dst_devicep = dst_devicep,
|
||||
.src_devicep = src_devicep
|
||||
};
|
||||
|
||||
if (check)
|
||||
return check;
|
||||
|
||||
if (depobj_count > 0 && depobj_list != NULL)
|
||||
{
|
||||
flags |= GOMP_TASK_FLAG_DEPEND;
|
||||
depend[0] = 0;
|
||||
depend[1] = (void *) (uintptr_t) depobj_count;
|
||||
depend[2] = depend[3] = depend[4] = 0;
|
||||
for (i = 0; i < depobj_count; ++i)
|
||||
depend[i + 5] = &depobj_list[i];
|
||||
}
|
||||
|
||||
GOMP_task (omp_target_memcpy_async_helper, &s, NULL, sizeof (s),
|
||||
__alignof__ (s), true, flags, depend, 0, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
|
||||
int num_dims, const size_t *volume,
|
||||
@ -3500,50 +3590,36 @@ omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
|
||||
int num_dims, const size_t *volume,
|
||||
const size_t *dst_offsets,
|
||||
const size_t *src_offsets,
|
||||
const size_t *dst_dimensions,
|
||||
const size_t *src_dimensions,
|
||||
int dst_device_num, int src_device_num)
|
||||
static int
|
||||
omp_target_memcpy_rect_check (void *dst, const void *src, int dst_device_num,
|
||||
int src_device_num,
|
||||
struct gomp_device_descr **dst_devicep,
|
||||
struct gomp_device_descr **src_devicep)
|
||||
{
|
||||
struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
|
||||
|
||||
if (!dst && !src)
|
||||
return INT_MAX;
|
||||
|
||||
if (dst_device_num != gomp_get_num_devices ())
|
||||
{
|
||||
if (dst_device_num < 0)
|
||||
return EINVAL;
|
||||
int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
|
||||
dst_devicep, src_devicep);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
dst_devicep = resolve_device (dst_device_num);
|
||||
if (dst_devicep == NULL)
|
||||
return EINVAL;
|
||||
|
||||
if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
||||
|| dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
||||
dst_devicep = NULL;
|
||||
}
|
||||
if (src_device_num != num_devices_openmp)
|
||||
{
|
||||
if (src_device_num < 0)
|
||||
return EINVAL;
|
||||
|
||||
src_devicep = resolve_device (src_device_num);
|
||||
if (src_devicep == NULL)
|
||||
return EINVAL;
|
||||
|
||||
if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
||||
|| src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
||||
src_devicep = NULL;
|
||||
}
|
||||
|
||||
if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
|
||||
if (*src_devicep != NULL && *dst_devicep != NULL && *src_devicep != *dst_devicep)
|
||||
return EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
omp_target_memcpy_rect_copy (void *dst, const void *src,
|
||||
size_t element_size, int num_dims,
|
||||
const size_t *volume, const size_t *dst_offsets,
|
||||
const size_t *src_offsets,
|
||||
const size_t *dst_dimensions,
|
||||
const size_t *src_dimensions,
|
||||
struct gomp_device_descr *dst_devicep,
|
||||
struct gomp_device_descr *src_devicep)
|
||||
{
|
||||
if (src_devicep)
|
||||
gomp_mutex_lock (&src_devicep->lock);
|
||||
else if (dst_devicep)
|
||||
@ -3556,9 +3632,115 @@ omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
|
||||
gomp_mutex_unlock (&src_devicep->lock);
|
||||
else if (dst_devicep)
|
||||
gomp_mutex_unlock (&dst_devicep->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
|
||||
int num_dims, const size_t *volume,
|
||||
const size_t *dst_offsets,
|
||||
const size_t *src_offsets,
|
||||
const size_t *dst_dimensions,
|
||||
const size_t *src_dimensions,
|
||||
int dst_device_num, int src_device_num)
|
||||
{
|
||||
struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
|
||||
|
||||
int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
|
||||
src_device_num, &dst_devicep,
|
||||
&src_devicep);
|
||||
|
||||
if (check)
|
||||
return check;
|
||||
|
||||
int ret = omp_target_memcpy_rect_copy (dst, src, element_size, num_dims,
|
||||
volume, dst_offsets, src_offsets,
|
||||
dst_dimensions, src_dimensions,
|
||||
dst_devicep, src_devicep);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
void *dst;
|
||||
const void *src;
|
||||
size_t element_size;
|
||||
const size_t *volume;
|
||||
const size_t *dst_offsets;
|
||||
const size_t *src_offsets;
|
||||
const size_t *dst_dimensions;
|
||||
const size_t *src_dimensions;
|
||||
struct gomp_device_descr *dst_devicep;
|
||||
struct gomp_device_descr *src_devicep;
|
||||
int num_dims;
|
||||
} omp_target_memcpy_rect_data;
|
||||
|
||||
static void
|
||||
omp_target_memcpy_rect_async_helper (void *args)
|
||||
{
|
||||
omp_target_memcpy_rect_data *a = args;
|
||||
int ret = omp_target_memcpy_rect_copy (a->dst, a->src, a->element_size,
|
||||
a->num_dims, a->volume, a->dst_offsets,
|
||||
a->src_offsets, a->dst_dimensions,
|
||||
a->src_dimensions, a->dst_devicep,
|
||||
a->src_devicep);
|
||||
if (ret)
|
||||
gomp_fatal ("omp_target_memcpy_rect failed");
|
||||
}
|
||||
|
||||
int
|
||||
omp_target_memcpy_rect_async (void *dst, const void *src, size_t element_size,
|
||||
int num_dims, const size_t *volume,
|
||||
const size_t *dst_offsets,
|
||||
const size_t *src_offsets,
|
||||
const size_t *dst_dimensions,
|
||||
const size_t *src_dimensions,
|
||||
int dst_device_num, int src_device_num,
|
||||
int depobj_count, omp_depend_t *depobj_list)
|
||||
{
|
||||
struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
|
||||
unsigned flags = 0;
|
||||
int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
|
||||
src_device_num, &dst_devicep,
|
||||
&src_devicep);
|
||||
void *depend[depobj_count + 5];
|
||||
int i;
|
||||
|
||||
omp_target_memcpy_rect_data s = {
|
||||
.dst = dst,
|
||||
.src = src,
|
||||
.element_size = element_size,
|
||||
.num_dims = num_dims,
|
||||
.volume = volume,
|
||||
.dst_offsets = dst_offsets,
|
||||
.src_offsets = src_offsets,
|
||||
.dst_dimensions = dst_dimensions,
|
||||
.src_dimensions = src_dimensions,
|
||||
.dst_devicep = dst_devicep,
|
||||
.src_devicep = src_devicep
|
||||
};
|
||||
|
||||
if (check)
|
||||
return check;
|
||||
|
||||
if (depobj_count > 0 && depobj_list != NULL)
|
||||
{
|
||||
flags |= GOMP_TASK_FLAG_DEPEND;
|
||||
depend[0] = 0;
|
||||
depend[1] = (void *) (uintptr_t) depobj_count;
|
||||
depend[2] = depend[3] = depend[4] = 0;
|
||||
for (i = 0; i < depobj_count; ++i)
|
||||
depend[i + 5] = &depobj_list[i];
|
||||
}
|
||||
|
||||
GOMP_task (omp_target_memcpy_rect_async_helper, &s, NULL, sizeof (s),
|
||||
__alignof__ (s), true, flags, depend, 0, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
|
||||
size_t size, size_t device_offset, int device_num)
|
||||
|
@ -712,6 +712,7 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
|
||||
}
|
||||
}
|
||||
|
||||
ialias (GOMP_task)
|
||||
ialias (GOMP_taskgroup_start)
|
||||
ialias (GOMP_taskgroup_end)
|
||||
ialias (GOMP_taskgroup_reduction_register)
|
||||
|
@ -0,0 +1,46 @@
|
||||
/* Test for omp_target_memcpy_async without considering dependence objects. */
|
||||
|
||||
#include <omp.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int d = omp_get_default_device ();
|
||||
int id = omp_get_initial_device ();
|
||||
int q[128], i;
|
||||
void *p;
|
||||
|
||||
if (d < 0 || d >= omp_get_num_devices ())
|
||||
d = id;
|
||||
|
||||
p = omp_target_alloc (130 * sizeof (int), d);
|
||||
if (p == NULL)
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < 128; i++)
|
||||
q[i] = i;
|
||||
|
||||
if (omp_target_memcpy_async (p, q, 128 * sizeof (int), sizeof (int), 0, d, id,
|
||||
0, NULL))
|
||||
abort ();
|
||||
|
||||
#pragma omp taskwait
|
||||
|
||||
int q2[128];
|
||||
for (i = 0; i < 128; ++i)
|
||||
q2[i] = 0;
|
||||
if (omp_target_memcpy_async (q2, p, 128 * sizeof(int), 0, sizeof (int), id, d,
|
||||
0, NULL))
|
||||
abort ();
|
||||
|
||||
#pragma omp taskwait
|
||||
|
||||
for (i = 0; i < 128; ++i)
|
||||
if (q2[i] != q[i])
|
||||
abort ();
|
||||
|
||||
omp_target_free (p, d);
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,74 @@
|
||||
/* Test for omp_target_memcpy_async considering dependence objects. */
|
||||
|
||||
#include <omp.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int d = omp_get_default_device ();
|
||||
int id = omp_get_initial_device ();
|
||||
int a[128], b[64], c[32], e[16], q[128], i;
|
||||
void *p;
|
||||
|
||||
if (d < 0 || d >= omp_get_num_devices ())
|
||||
d = id;
|
||||
|
||||
p = omp_target_alloc (130 * sizeof (int), d);
|
||||
if (p == NULL)
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < 128; ++i)
|
||||
a[i] = i + 1;
|
||||
for (i = 0; i < 64; ++i)
|
||||
b[i] = i + 2;
|
||||
for (i = 0; i < 32; i++)
|
||||
c[i] = 0;
|
||||
for (i = 0; i < 16; i++)
|
||||
e[i] = i + 4;
|
||||
|
||||
omp_depend_t obj[2];
|
||||
|
||||
#pragma omp parallel num_threads(5)
|
||||
#pragma omp single
|
||||
{
|
||||
#pragma omp task depend(out: p)
|
||||
omp_target_memcpy (p, a, 128 * sizeof (int), 0, 0, d, id);
|
||||
|
||||
#pragma omp task depend(inout: p)
|
||||
omp_target_memcpy (p, b, 64 * sizeof (int), 0, 0, d, id);
|
||||
|
||||
#pragma omp task depend(out: c)
|
||||
for (i = 0; i < 32; i++)
|
||||
c[i] = i + 3;
|
||||
|
||||
#pragma omp depobj(obj[0]) depend(inout: p)
|
||||
#pragma omp depobj(obj[1]) depend(in: c)
|
||||
omp_target_memcpy_async (p, c, 32 * sizeof (int), 0, 0, d, id, 2, obj);
|
||||
|
||||
#pragma omp task depend(in: p)
|
||||
omp_target_memcpy (p, e, 16 * sizeof (int), 0, 0, d, id);
|
||||
}
|
||||
|
||||
#pragma omp taskwait
|
||||
|
||||
for (i = 0; i < 128; ++i)
|
||||
q[i] = 0;
|
||||
omp_target_memcpy (q, p, 128 * sizeof(int), 0, 0, id, d);
|
||||
for (i = 0; i < 16; ++i)
|
||||
if (q[i] != i + 4)
|
||||
abort ();
|
||||
for (i = 16; i < 32; ++i)
|
||||
if (q[i] != i + 3)
|
||||
abort ();
|
||||
for (i = 32; i < 64; ++i)
|
||||
if (q[i] != i + 2)
|
||||
abort ();
|
||||
for (i = 64; i < 128; ++i)
|
||||
if (q[i] != i + 1)
|
||||
abort ();
|
||||
|
||||
omp_target_free (p, d);
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,68 @@
|
||||
/* Test for omp_target_memcpy_rect_async without considering dependence
|
||||
objects. */
|
||||
|
||||
#include <omp.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define NUM_DIMS 3
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int d = omp_get_default_device ();
|
||||
int id = omp_get_initial_device ();
|
||||
int q[128], q2[128], i;
|
||||
void *p;
|
||||
|
||||
if (d < 0 || d >= omp_get_num_devices ())
|
||||
d = id;
|
||||
|
||||
p = omp_target_alloc (130 * sizeof (int), d);
|
||||
if (p == NULL)
|
||||
return 0;
|
||||
|
||||
if (omp_target_memcpy_rect_async (NULL, NULL, 0, 0, NULL, NULL, NULL, NULL,
|
||||
NULL, d, id, 0, NULL) < 3
|
||||
|| omp_target_memcpy_rect_async (NULL, NULL, 0, 0, NULL, NULL, NULL, NULL,
|
||||
NULL, id, d, 0, NULL) < 3
|
||||
|| omp_target_memcpy_rect_async (NULL, NULL, 0, 0, NULL, NULL, NULL, NULL,
|
||||
NULL, id, id, 0, NULL) < 3)
|
||||
abort ();
|
||||
|
||||
for (i = 0; i < 128; i++)
|
||||
q[i] = 0;
|
||||
if (omp_target_memcpy (p, q, 128 * sizeof (int), 0, 0, d, id) != 0)
|
||||
abort ();
|
||||
|
||||
for (i = 0; i < 128; i++)
|
||||
q[i] = i + 1;
|
||||
|
||||
size_t volume[NUM_DIMS] = { 1, 2, 3 };
|
||||
size_t dst_offsets[NUM_DIMS] = { 0, 0, 0 };
|
||||
size_t src_offsets[NUM_DIMS] = { 0, 0, 0 };
|
||||
size_t dst_dimensions[NUM_DIMS] = { 3, 4, 5 };
|
||||
size_t src_dimensions[NUM_DIMS] = { 2, 3, 4 };
|
||||
|
||||
if (omp_target_memcpy_rect_async (p, q, sizeof (int), NUM_DIMS, volume,
|
||||
dst_offsets, src_offsets, dst_dimensions,
|
||||
src_dimensions, d, id, 0, NULL) != 0)
|
||||
abort ();
|
||||
|
||||
#pragma omp taskwait
|
||||
|
||||
for (i = 0; i < 128; i++)
|
||||
q2[i] = 0;
|
||||
if (omp_target_memcpy (q2, p, 128 * sizeof (int), 0, 0, id, d) != 0)
|
||||
abort ();
|
||||
|
||||
/* q2 is expected to contain: 1 2 3 0 0 5 6 7 0 0 .. 0 */
|
||||
if (q2[0] != 1 || q2[1] != 2 || q2[2] !=3 || q2[3] != 0 || q2[4] != 0
|
||||
|| q2[5] != 5 || q2[6] != 6 || q2[7] != 7)
|
||||
abort ();
|
||||
for (i = 8; i < 128; ++i)
|
||||
if (q2[i] != 0)
|
||||
abort ();
|
||||
|
||||
omp_target_free (p, d);
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,91 @@
|
||||
/* Test for omp_target_memcpy_rect_async considering dependence objects. */
|
||||
|
||||
#include <omp.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define NUM_DIMS 3
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int d = omp_get_default_device ();
|
||||
int id = omp_get_initial_device ();
|
||||
int a[128], b[64], c[128], e[16], q[128], i;
|
||||
void *p;
|
||||
|
||||
if (d < 0 || d >= omp_get_num_devices ())
|
||||
d = id;
|
||||
|
||||
p = omp_target_alloc (130 * sizeof (int), d);
|
||||
if (p == NULL)
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < 128; i++)
|
||||
q[i] = 0;
|
||||
if (omp_target_memcpy (p, q, 128 * sizeof (int), 0, 0, d, id) != 0)
|
||||
abort ();
|
||||
|
||||
size_t volume[NUM_DIMS] = { 2, 2, 3 };
|
||||
size_t dst_offsets[NUM_DIMS] = { 0, 0, 0 };
|
||||
size_t src_offsets[NUM_DIMS] = { 0, 0, 0 };
|
||||
size_t dst_dimensions[NUM_DIMS] = { 3, 4, 5 };
|
||||
size_t src_dimensions[NUM_DIMS] = { 2, 3, 4 };
|
||||
|
||||
for (i = 0; i < 128; i++)
|
||||
a[i] = 42;
|
||||
for (i = 0; i < 64; i++)
|
||||
b[i] = 24;
|
||||
for (i = 0; i < 128; i++)
|
||||
c[i] = 0;
|
||||
for (i = 0; i < 16; i++)
|
||||
e[i] = 77;
|
||||
|
||||
omp_depend_t obj[2];
|
||||
|
||||
#pragma omp parallel num_threads(5)
|
||||
#pragma omp single
|
||||
{
|
||||
#pragma omp task depend (out: p)
|
||||
omp_target_memcpy (p, a, 128 * sizeof (int), 0, 0, d, id);
|
||||
|
||||
#pragma omp task depend(inout: p)
|
||||
omp_target_memcpy (p, b, 64 * sizeof (int), 0, 0, d, id);
|
||||
|
||||
#pragma omp task depend(out: c)
|
||||
for (i = 0; i < 128; i++)
|
||||
c[i] = i + 1;
|
||||
|
||||
#pragma omp depobj(obj[0]) depend(inout: p)
|
||||
#pragma omp depobj(obj[1]) depend(in: c)
|
||||
|
||||
/* This produces: 1 2 3 - - 5 6 7 - - at positions 0..9 and
|
||||
13 14 15 - - 17 18 19 - - at positions 20..29. */
|
||||
omp_target_memcpy_rect_async (p, c, sizeof (int), NUM_DIMS, volume,
|
||||
dst_offsets, src_offsets, dst_dimensions,
|
||||
src_dimensions, d, id, 2, obj);
|
||||
|
||||
#pragma omp task depend(in: p)
|
||||
omp_target_memcpy (p, e, 16 * sizeof (int), 0, 0, d, id);
|
||||
}
|
||||
|
||||
#pragma omp taskwait
|
||||
|
||||
if (omp_target_memcpy (q, p, 128 * sizeof(int), 0, 0, id, d) != 0)
|
||||
abort ();
|
||||
|
||||
for (i = 0; i < 16; ++i)
|
||||
if (q[i] != 77)
|
||||
abort ();
|
||||
if (q[20] != 13 || q[21] != 14 || q[22] != 15 || q[25] != 17 || q[26] != 18
|
||||
|| q[27] != 19)
|
||||
abort ();
|
||||
for (i = 28; i < 64; ++i)
|
||||
if (q[i] != 24)
|
||||
abort ();
|
||||
for (i = 64; i < 128; ++i)
|
||||
if (q[i] != 42)
|
||||
abort ();
|
||||
|
||||
omp_target_free (p, d);
|
||||
return 0;
|
||||
}
|
42
libgomp/testsuite/libgomp.fortran/target-memcpy-async-1.f90
Normal file
42
libgomp/testsuite/libgomp.fortran/target-memcpy-async-1.f90
Normal file
@ -0,0 +1,42 @@
|
||||
! Test for omp_target_memcpy_async without considering dependence objects.
|
||||
|
||||
program main
|
||||
use omp_lib
|
||||
use iso_c_binding
|
||||
implicit none (external, type)
|
||||
integer :: d, id, i, j
|
||||
integer, target :: q(0:127), q2(0:127)
|
||||
type(c_ptr) :: p
|
||||
integer(omp_depend_kind) :: obj(1:0)
|
||||
|
||||
d = omp_get_default_device ()
|
||||
id = omp_get_initial_device ()
|
||||
|
||||
if (d < 0 .or. d >= omp_get_num_devices ()) &
|
||||
d = id
|
||||
|
||||
p = omp_target_alloc (130 * c_sizeof (q), d)
|
||||
if (.not. c_associated (p)) &
|
||||
stop 0 ! okay
|
||||
|
||||
q = [(i, i = 0, 127)]
|
||||
if (omp_target_memcpy_async (p, c_loc (q), 128 * sizeof (q(0)), 0_c_size_t, &
|
||||
0_c_size_t, d, id, 0, obj) /= 0) &
|
||||
stop 1
|
||||
|
||||
!$omp taskwait
|
||||
|
||||
q2 = [(0, i = 0, 127)]
|
||||
if (omp_target_memcpy_async (c_loc (q2), p, 128 * sizeof (q2(0)), 0_c_size_t,&
|
||||
0_c_size_t, id, d, 0, obj) /= 0) &
|
||||
stop 2
|
||||
|
||||
!$omp taskwait
|
||||
|
||||
do j = 0, 127
|
||||
if (q(j) /= q2(j)) &
|
||||
stop 3
|
||||
end do
|
||||
|
||||
call omp_target_free (p, d)
|
||||
end program main
|
91
libgomp/testsuite/libgomp.fortran/target-memcpy-async-2.f90
Normal file
91
libgomp/testsuite/libgomp.fortran/target-memcpy-async-2.f90
Normal file
@ -0,0 +1,91 @@
|
||||
! Test for omp_target_memcpy_async considering dependence objects.
|
||||
|
||||
program main
|
||||
use omp_lib
|
||||
use iso_c_binding
|
||||
implicit none (external, type)
|
||||
integer :: d, id, i, j
|
||||
integer, target :: a(0:127), b(0:63), c(0:31), e(0:15), q(0:127)
|
||||
type(c_ptr) :: p
|
||||
integer(omp_depend_kind) :: obj(0:1)
|
||||
|
||||
d = omp_get_default_device ()
|
||||
id = omp_get_initial_device ()
|
||||
|
||||
if (d < 0 .or. d >= omp_get_num_devices ()) &
|
||||
d = id
|
||||
|
||||
p = omp_target_alloc (130 * c_sizeof (q), d)
|
||||
if (.not. c_associated (p)) &
|
||||
stop 0 ! okay
|
||||
|
||||
a = [(i + 1, i = 0, 127)]
|
||||
b = [(i + 2, i = 0, 63)]
|
||||
c = [(0, i = 0, 31)]
|
||||
e = [(i + 4, i = 0, 15)]
|
||||
|
||||
!$omp parallel num_threads(5)
|
||||
!$omp single
|
||||
|
||||
!$omp task depend(out: p)
|
||||
if (omp_target_memcpy (p, c_loc (a), 128 * sizeof (a(0)), 0_c_size_t, &
|
||||
0_c_size_t, d, id) /= 0) &
|
||||
stop 1
|
||||
!$omp end task
|
||||
|
||||
!$omp task depend(inout: p)
|
||||
if (omp_target_memcpy (p, c_loc (b), 64 * sizeof (b(0)), 0_c_size_t, &
|
||||
0_c_size_t, d, id) /= 0) &
|
||||
stop 2
|
||||
!$omp end task
|
||||
|
||||
!$omp task depend(out: c)
|
||||
do j = 0, 31
|
||||
c(j) = j + 3
|
||||
end do
|
||||
!$omp end task
|
||||
|
||||
!$omp depobj(obj(0)) depend(inout: p)
|
||||
!$omp depobj(obj(1)) depend(in: c)
|
||||
if (omp_target_memcpy_async (p, c_loc (c), 32 * sizeof (c(0)), 0_c_size_t, &
|
||||
0_c_size_t, d, id, 2, obj) /= 0) &
|
||||
stop 3
|
||||
|
||||
!$omp task depend(in: p)
|
||||
if (omp_target_memcpy (p, c_loc (e), 16 * sizeof (e(0)), 0_c_size_t, &
|
||||
0_c_size_t, d, id) /= 0) &
|
||||
stop 4
|
||||
!$omp end task
|
||||
|
||||
!$omp end single
|
||||
!$omp end parallel
|
||||
|
||||
!$omp taskwait
|
||||
|
||||
q = [(0, i = 0, 127)]
|
||||
if (omp_target_memcpy (c_loc (q), p, 128 * sizeof (q(0)), 0_c_size_t, &
|
||||
0_c_size_t, id, d) /= 0) &
|
||||
stop 5
|
||||
|
||||
do j = 0, 15
|
||||
if (q(j) /= j+4) &
|
||||
stop 10
|
||||
end do
|
||||
|
||||
do j = 16, 31
|
||||
if (q(j) /= j+3) &
|
||||
stop 11
|
||||
end do
|
||||
|
||||
do j = 32, 63
|
||||
if (q(j) /= j+2) &
|
||||
stop 12
|
||||
end do
|
||||
|
||||
do j = 64, 127
|
||||
if (q(j) /= j+1) &
|
||||
stop 13
|
||||
end do
|
||||
|
||||
call omp_target_free (p, d)
|
||||
end program main
|
@ -0,0 +1,86 @@
|
||||
! Test for omp_target_memcpy_rect_async without considering dependence objects.
|
||||
|
||||
program main
|
||||
use omp_lib
|
||||
use iso_c_binding
|
||||
implicit none (external, type)
|
||||
integer :: d, id, i, j
|
||||
integer, target :: q(0:127), q2(0:127)
|
||||
type(c_ptr) :: p
|
||||
integer(omp_depend_kind) :: obj(1:0)
|
||||
|
||||
integer(kind=c_size_t) :: volume(0:2)
|
||||
integer(kind=c_size_t) :: dst_offsets(0:2)
|
||||
integer(kind=c_size_t) :: src_offsets(0:2)
|
||||
integer(kind=c_size_t) :: dst_dimensions(0:2)
|
||||
integer(kind=c_size_t) :: src_dimensions(0:2)
|
||||
integer(kind=c_size_t) :: empty(1:0)
|
||||
|
||||
d = omp_get_default_device ()
|
||||
id = omp_get_initial_device ()
|
||||
|
||||
if (d < 0 .or. d >= omp_get_num_devices ()) &
|
||||
d = id
|
||||
|
||||
p = omp_target_alloc (130 * c_sizeof (q), d)
|
||||
if (.not. c_associated (p)) &
|
||||
stop 0 ! okay
|
||||
|
||||
if (omp_target_memcpy_rect_async (C_NULL_PTR, C_NULL_PTR, 0_c_size_t, 0, &
|
||||
empty, empty, empty, empty, empty, d, id, &
|
||||
0, obj) < 3 &
|
||||
.or. omp_target_memcpy_rect_async (C_NULL_PTR, C_NULL_PTR, 0_c_size_t, 0, &
|
||||
empty, empty, empty, empty, empty, &
|
||||
id, d, 0, obj) < 3 &
|
||||
.or. omp_target_memcpy_rect_async (C_NULL_PTR, C_NULL_PTR, 0_c_size_t, 0, &
|
||||
empty, empty, empty, empty, empty, &
|
||||
id, id, 0, obj) < 3) &
|
||||
stop 1
|
||||
|
||||
q = [(0, i = 0, 127)]
|
||||
if (omp_target_memcpy (p, c_loc (q), 128 * sizeof (q(0)), 0_c_size_t, &
|
||||
0_c_size_t, d, id) /= 0) &
|
||||
stop 2
|
||||
|
||||
q = [(i+1, i = 0, 127)]
|
||||
|
||||
volume(2) = 3
|
||||
volume(1) = 2
|
||||
volume(0) = 1
|
||||
dst_offsets(2) = 0
|
||||
dst_offsets(1) = 0
|
||||
dst_offsets(0) = 0
|
||||
src_offsets(2) = 0
|
||||
src_offsets(1) = 0
|
||||
src_offsets(0) = 0
|
||||
dst_dimensions(2) = 5
|
||||
dst_dimensions(1) = 4
|
||||
dst_dimensions(0) = 3
|
||||
src_dimensions(2) = 4
|
||||
src_dimensions(1) = 3
|
||||
src_dimensions(0) = 2
|
||||
|
||||
if (omp_target_memcpy_rect_async (p, c_loc (q), sizeof (q(0)), 3, volume, &
|
||||
dst_offsets, src_offsets, dst_dimensions, src_dimensions, d, id, 0, &
|
||||
obj) /= 0) &
|
||||
stop 3
|
||||
|
||||
!$omp taskwait
|
||||
|
||||
q2 = [(0, i = 0, 127)]
|
||||
if (omp_target_memcpy (c_loc (q2), p, 128 * sizeof (q2(0)), 0_c_size_t, &
|
||||
0_c_size_t, id, d) /= 0) &
|
||||
stop 4
|
||||
|
||||
! q2 is expected to contain: 1 2 3 0 0 5 6 7 0 0 .. 0
|
||||
if (q2(0) /= 1 .or. q2(1) /= 2 .or. q2(2) /= 3 .or. q2(3) /= 0 &
|
||||
.or. q2(4) /= 0 .or. q2(5) /= 5 .or. q2(6) /= 6 .or. q2(7) /= 7) &
|
||||
stop 5
|
||||
|
||||
do j = 8, 127
|
||||
if (q2(j) /= 0) &
|
||||
stop 6
|
||||
end do
|
||||
|
||||
call omp_target_free (p, d)
|
||||
end program main
|
117
libgomp/testsuite/libgomp.fortran/target-memcpy-rect-async-2.f90
Normal file
117
libgomp/testsuite/libgomp.fortran/target-memcpy-rect-async-2.f90
Normal file
@ -0,0 +1,117 @@
|
||||
! Test for omp_target_memcpy_rect_async considering dependence objects.
|
||||
|
||||
program main
|
||||
use omp_lib
|
||||
use iso_c_binding
|
||||
implicit none (external, type)
|
||||
integer :: d, id, i, j
|
||||
integer, target :: a(0:127), b(0:63), c(0:127), e(0:15), q(0:127)
|
||||
type(c_ptr) :: p
|
||||
integer(omp_depend_kind) :: obj(0:2)
|
||||
|
||||
integer(kind=c_size_t) :: volume(0:2)
|
||||
integer(kind=c_size_t) :: dst_offsets(0:2)
|
||||
integer(kind=c_size_t) :: src_offsets(0:2)
|
||||
integer(kind=c_size_t) :: dst_dimensions(0:2)
|
||||
integer(kind=c_size_t) :: src_dimensions(0:2)
|
||||
|
||||
d = omp_get_default_device ()
|
||||
id = omp_get_initial_device ()
|
||||
|
||||
if (d < 0 .or. d >= omp_get_num_devices ()) &
|
||||
d = id
|
||||
|
||||
p = omp_target_alloc (130 * c_sizeof (q), d)
|
||||
if (.not. c_associated (p)) &
|
||||
stop 0 ! okay
|
||||
|
||||
a = [(42, i = 0, 127)]
|
||||
b = [(24, i = 0, 63)]
|
||||
c = [(0, i = 0, 127)]
|
||||
e = [(77, i = 0, 15)]
|
||||
|
||||
volume(2) = 3
|
||||
volume(1) = 2
|
||||
volume(0) = 2
|
||||
dst_offsets(2) = 0
|
||||
dst_offsets(1) = 0
|
||||
dst_offsets(0) = 0
|
||||
src_offsets(2) = 0
|
||||
src_offsets(1) = 0
|
||||
src_offsets(0) = 0
|
||||
dst_dimensions(2) = 5
|
||||
dst_dimensions(1) = 4
|
||||
dst_dimensions(0) = 3
|
||||
src_dimensions(2) = 4
|
||||
src_dimensions(1) = 3
|
||||
src_dimensions(0) = 2
|
||||
|
||||
!$omp parallel num_threads(5)
|
||||
!$omp single
|
||||
|
||||
!$omp task depend(out: p)
|
||||
if (omp_target_memcpy (p, c_loc (a), 128 * sizeof (a(0)), 0_c_size_t, &
|
||||
0_c_size_t, d, id) /= 0) &
|
||||
stop 1
|
||||
!$omp end task
|
||||
|
||||
!$omp task depend(inout: p)
|
||||
if (omp_target_memcpy (p, c_loc (b), 64 * sizeof (b(0)), 0_c_size_t, &
|
||||
0_c_size_t, d, id) /= 0) &
|
||||
stop 2
|
||||
!$omp end task
|
||||
|
||||
!$omp task depend(out: c)
|
||||
do j = 0, 127
|
||||
c(j) = j + 1
|
||||
end do
|
||||
!$omp end task
|
||||
|
||||
!$omp depobj(obj(0)) depend(inout: p)
|
||||
!$omp depobj(obj(1)) depend(in: c)
|
||||
|
||||
! This produces: 1 2 3 - - 5 6 7 - - at positions 0..9 and
|
||||
! 13 14 15 - - 17 18 19 - - at positions 20..29.
|
||||
if (omp_target_memcpy_rect_async (p, c_loc (c), sizeof (c(0)), 3, volume, &
|
||||
dst_offsets, src_offsets, &
|
||||
dst_dimensions, src_dimensions, d, id, &
|
||||
2, obj) /= 0) &
|
||||
stop 3
|
||||
|
||||
!$omp task depend(in: p)
|
||||
if (omp_target_memcpy (p, c_loc (e), 16 * sizeof (e(0)), 0_c_size_t, &
|
||||
0_c_size_t, d, id) /= 0) &
|
||||
stop 4
|
||||
!$omp end task
|
||||
|
||||
!$omp end single
|
||||
!$omp end parallel
|
||||
|
||||
!$omp taskwait
|
||||
|
||||
q = [(0, i = 0, 127)]
|
||||
if (omp_target_memcpy (c_loc (q), p, 128 * sizeof (q(0)), 0_c_size_t, &
|
||||
0_c_size_t, id, d) /= 0) &
|
||||
stop 5
|
||||
|
||||
do j = 0, 15
|
||||
if (q(j) /= 77) &
|
||||
stop 6
|
||||
end do
|
||||
|
||||
if (q(20) /= 13 .or. q(21) /= 14 .or. q(22) /= 15 .or. q(25) /= 17 &
|
||||
.or. q(26) /= 18 .or. q(27) /= 19) &
|
||||
stop 7
|
||||
|
||||
do j = 28, 63
|
||||
if (q(j) /= 24) &
|
||||
stop 8
|
||||
end do
|
||||
|
||||
do j = 64, 127
|
||||
if (q(j) /= 42) &
|
||||
stop 9
|
||||
end do
|
||||
|
||||
call omp_target_free (p, d)
|
||||
end program main
|
Loading…
Reference in New Issue
Block a user