mirror of
https://gcc.gnu.org/git/gcc.git
synced 2024-11-23 19:03:59 +08:00
0c56fd6a1f
The run time library loads the offload functions and variable and optionally the ICV variable and returns the number of loaded items, which has to match the host side. The plugin returns "+1" (since GCC 12) for the ICV variable entry, independently whether it was loaded or not, but the var's value (start == end == 0) can be used to detect when this failed. Thus, we can tighten the assert check - which this commit does together with making the output less surprising - and simplify the condition further below. libgomp/ChangeLog: * target.c (gomp_load_image_to_device): Extend fatal-error message; simplify a condition.
5416 lines
164 KiB
C
5416 lines
164 KiB
C
/* Copyright (C) 2013-2024 Free Software Foundation, Inc.
|
|
Contributed by Jakub Jelinek <jakub@redhat.com>.
|
|
|
|
This file is part of the GNU Offloading and Multi Processing Library
|
|
(libgomp).
|
|
|
|
Libgomp is free software; you can redistribute it and/or modify it
|
|
under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 3, or (at your option)
|
|
any later version.
|
|
|
|
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
more details.
|
|
|
|
Under Section 7 of GPL version 3, you are granted additional
|
|
permissions described in the GCC Runtime Library Exception, version
|
|
3.1, as published by the Free Software Foundation.
|
|
|
|
You should have received a copy of the GNU General Public License and
|
|
a copy of the GCC Runtime Library Exception along with this program;
|
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
/* This file contains the support of offloading. */
|
|
|
|
#include "libgomp.h"
|
|
#include "oacc-plugin.h"
|
|
#include "oacc-int.h"
|
|
#include "gomp-constants.h"
|
|
#include <limits.h>
|
|
#include <stdbool.h>
|
|
#include <stdlib.h>
|
|
#ifdef HAVE_INTTYPES_H
|
|
# include <inttypes.h> /* For PRIu64. */
|
|
#endif
|
|
#include <string.h>
|
|
#include <stdio.h> /* For snprintf. */
|
|
#include <assert.h>
|
|
#include <errno.h>
|
|
|
|
#ifdef PLUGIN_SUPPORT
|
|
#include <dlfcn.h>
|
|
#include "plugin-suffix.h"
|
|
#endif
|
|
|
|
/* Define another splay tree instantiation - for reverse offload. */
|
|
#define splay_tree_prefix reverse
|
|
#define splay_tree_static
|
|
#define splay_tree_c
|
|
#include "splay-tree.h"
|
|
|
|
|
|
typedef uintptr_t *hash_entry_type;
|
|
static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
|
|
static inline void htab_free (void *ptr) { free (ptr); }
|
|
#include "hashtab.h"
|
|
|
|
ialias_redirect (GOMP_task)
|
|
|
|
static inline hashval_t
|
|
htab_hash (hash_entry_type element)
|
|
{
|
|
return hash_pointer ((void *) element);
|
|
}
|
|
|
|
static inline bool
|
|
htab_eq (hash_entry_type x, hash_entry_type y)
|
|
{
|
|
return x == y;
|
|
}
|
|
|
|
#define FIELD_TGT_EMPTY (~(size_t) 0)
|
|
|
|
static void gomp_target_init (void);
|
|
|
|
/* The whole initialization code for offloading plugins is only run one. */
|
|
static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
|
|
|
|
/* Mutex for offload image registration. */
|
|
static gomp_mutex_t register_lock;
|
|
|
|
/* This structure describes an offload image.
|
|
It contains type of the target device, pointer to host table descriptor, and
|
|
pointer to target data. */
|
|
struct offload_image_descr {
|
|
unsigned version;
|
|
enum offload_target_type type;
|
|
const void *host_table;
|
|
const void *target_data;
|
|
};
|
|
|
|
/* Array of descriptors of offload images. */
|
|
static struct offload_image_descr *offload_images;
|
|
|
|
/* Total number of offload images. */
|
|
static int num_offload_images;
|
|
|
|
/* Array of descriptors for all available devices. */
|
|
static struct gomp_device_descr *devices;
|
|
|
|
/* Total number of available devices. */
|
|
static int num_devices;
|
|
|
|
/* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
|
|
static int num_devices_openmp;
|
|
|
|
/* OpenMP requires mask. */
|
|
static int omp_requires_mask;
|
|
|
|
/* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
|
|
|
|
static void *
|
|
gomp_realloc_unlock (void *old, size_t size)
|
|
{
|
|
void *ret = realloc (old, size);
|
|
if (ret == NULL)
|
|
{
|
|
gomp_mutex_unlock (®ister_lock);
|
|
gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
attribute_hidden void
|
|
gomp_init_targets_once (void)
|
|
{
|
|
(void) pthread_once (&gomp_is_initialized, gomp_target_init);
|
|
}
|
|
|
|
attribute_hidden int
|
|
gomp_get_num_devices (void)
|
|
{
|
|
gomp_init_targets_once ();
|
|
return num_devices_openmp;
|
|
}
|
|
|
|
static struct gomp_device_descr *
|
|
resolve_device (int device_id, bool remapped)
|
|
{
|
|
/* Get number of devices and thus ensure that 'gomp_init_targets_once' was
|
|
called, which must be done before using default_device_var. */
|
|
int num_devices = gomp_get_num_devices ();
|
|
|
|
if (remapped && device_id == GOMP_DEVICE_ICV)
|
|
{
|
|
struct gomp_task_icv *icv = gomp_icv (false);
|
|
device_id = icv->default_device_var;
|
|
remapped = false;
|
|
}
|
|
|
|
if (device_id < 0)
|
|
{
|
|
if (device_id == (remapped ? GOMP_DEVICE_HOST_FALLBACK
|
|
: omp_initial_device))
|
|
return NULL;
|
|
if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
|
|
&& num_devices == 0)
|
|
gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
|
|
"but only the host device is available");
|
|
else if (device_id == omp_invalid_device)
|
|
gomp_fatal ("omp_invalid_device encountered");
|
|
else if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
|
|
gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
|
|
"but device not found");
|
|
|
|
return NULL;
|
|
}
|
|
else if (device_id >= num_devices)
|
|
{
|
|
if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
|
|
&& device_id != num_devices)
|
|
gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
|
|
"but device not found");
|
|
|
|
return NULL;
|
|
}
|
|
|
|
gomp_mutex_lock (&devices[device_id].lock);
|
|
if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
|
|
gomp_init_device (&devices[device_id]);
|
|
else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
|
|
{
|
|
gomp_mutex_unlock (&devices[device_id].lock);
|
|
|
|
if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
|
|
gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
|
|
"but device is finalized");
|
|
|
|
return NULL;
|
|
}
|
|
gomp_mutex_unlock (&devices[device_id].lock);
|
|
|
|
return &devices[device_id];
|
|
}
|
|
|
|
|
|
static inline splay_tree_key
|
|
gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
|
|
{
|
|
if (key->host_start != key->host_end)
|
|
return splay_tree_lookup (mem_map, key);
|
|
|
|
key->host_end++;
|
|
splay_tree_key n = splay_tree_lookup (mem_map, key);
|
|
key->host_end--;
|
|
if (n)
|
|
return n;
|
|
key->host_start--;
|
|
n = splay_tree_lookup (mem_map, key);
|
|
key->host_start++;
|
|
if (n)
|
|
return n;
|
|
return splay_tree_lookup (mem_map, key);
|
|
}
|
|
|
|
static inline reverse_splay_tree_key
|
|
gomp_map_lookup_rev (reverse_splay_tree mem_map_rev, reverse_splay_tree_key key)
|
|
{
|
|
return reverse_splay_tree_lookup (mem_map_rev, key);
|
|
}
|
|
|
|
static inline splay_tree_key
|
|
gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
|
|
{
|
|
if (key->host_start != key->host_end)
|
|
return splay_tree_lookup (mem_map, key);
|
|
|
|
key->host_end++;
|
|
splay_tree_key n = splay_tree_lookup (mem_map, key);
|
|
key->host_end--;
|
|
return n;
|
|
}
|
|
|
|
static inline void
|
|
gomp_device_copy (struct gomp_device_descr *devicep,
|
|
bool (*copy_func) (int, void *, const void *, size_t),
|
|
const char *dst, void *dstaddr,
|
|
const char *src, const void *srcaddr,
|
|
size_t size)
|
|
{
|
|
if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
|
|
src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
|
|
}
|
|
}
|
|
|
|
static inline void
|
|
goacc_device_copy_async (struct gomp_device_descr *devicep,
|
|
bool (*copy_func) (int, void *, const void *, size_t,
|
|
struct goacc_asyncqueue *),
|
|
const char *dst, void *dstaddr,
|
|
const char *src, const void *srcaddr,
|
|
const void *srcaddr_orig,
|
|
size_t size, struct goacc_asyncqueue *aq)
|
|
{
|
|
if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
if (srcaddr_orig && srcaddr_orig != srcaddr)
|
|
gomp_fatal ("Copying of %s object [%p..%p)"
|
|
" via buffer %s object [%p..%p)"
|
|
" to %s object [%p..%p) failed",
|
|
src, srcaddr_orig, srcaddr_orig + size,
|
|
src, srcaddr, srcaddr + size,
|
|
dst, dstaddr, dstaddr + size);
|
|
else
|
|
gomp_fatal ("Copying of %s object [%p..%p)"
|
|
" to %s object [%p..%p) failed",
|
|
src, srcaddr, srcaddr + size,
|
|
dst, dstaddr, dstaddr + size);
|
|
}
|
|
}
|
|
|
|
/* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
|
|
host to device memory transfers. */
|
|
|
|
struct gomp_coalesce_chunk
|
|
{
|
|
/* The starting and ending point of a coalesced chunk of memory. */
|
|
size_t start, end;
|
|
};
|
|
|
|
struct gomp_coalesce_buf
|
|
{
|
|
/* Buffer into which gomp_copy_host2dev will memcpy data and from which
|
|
it will be copied to the device. */
|
|
void *buf;
|
|
struct target_mem_desc *tgt;
|
|
/* Array with offsets, chunks[i].start is the starting offset and
|
|
chunks[i].end ending offset relative to tgt->tgt_start device address
|
|
of chunks which are to be copied to buf and later copied to device. */
|
|
struct gomp_coalesce_chunk *chunks;
|
|
/* Number of chunks in chunks array, or -1 if coalesce buffering should not
|
|
be performed. */
|
|
long chunk_cnt;
|
|
/* During construction of chunks array, how many memory regions are within
|
|
the last chunk. If there is just one memory region for a chunk, we copy
|
|
it directly to device rather than going through buf. */
|
|
long use_cnt;
|
|
};
|
|
|
|
/* Maximum size of memory region considered for coalescing. Larger copies
|
|
are performed directly. */
|
|
#define MAX_COALESCE_BUF_SIZE (32 * 1024)
|
|
|
|
/* Maximum size of a gap in between regions to consider them being copied
|
|
within the same chunk. All the device offsets considered are within
|
|
newly allocated device memory, so it isn't fatal if we copy some padding
|
|
in between from host to device. The gaps come either from alignment
|
|
padding or from memory regions which are not supposed to be copied from
|
|
host to device (e.g. map(alloc:), map(from:) etc.). */
|
|
#define MAX_COALESCE_BUF_GAP (4 * 1024)
|
|
|
|
/* Add region with device tgt_start relative offset and length to CBUF.
|
|
|
|
This must not be used for asynchronous copies, because the host data might
|
|
not be computed yet (by an earlier asynchronous compute region, for
|
|
example). The exception is for EPHEMERAL data, that we know is available
|
|
already "by construction". */
|
|
|
|
static inline void
|
|
gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
|
|
{
|
|
if (len > MAX_COALESCE_BUF_SIZE || len == 0)
|
|
return;
|
|
if (cbuf->chunk_cnt)
|
|
{
|
|
if (cbuf->chunk_cnt < 0)
|
|
return;
|
|
if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
|
|
{
|
|
cbuf->chunk_cnt = -1;
|
|
return;
|
|
}
|
|
if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
|
|
{
|
|
cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
|
|
cbuf->use_cnt++;
|
|
return;
|
|
}
|
|
/* If the last chunk is only used by one mapping, discard it,
|
|
as it will be one host to device copy anyway and
|
|
memcpying it around will only waste cycles. */
|
|
if (cbuf->use_cnt == 1)
|
|
cbuf->chunk_cnt--;
|
|
}
|
|
cbuf->chunks[cbuf->chunk_cnt].start = start;
|
|
cbuf->chunks[cbuf->chunk_cnt].end = start + len;
|
|
cbuf->chunk_cnt++;
|
|
cbuf->use_cnt = 1;
|
|
}
|
|
|
|
/* Return true for mapping kinds which need to copy data from the
|
|
host to device for regions that weren't previously mapped. */
|
|
|
|
static inline bool
|
|
gomp_to_device_kind_p (int kind)
|
|
{
|
|
switch (kind)
|
|
{
|
|
case GOMP_MAP_ALLOC:
|
|
case GOMP_MAP_FROM:
|
|
case GOMP_MAP_FORCE_ALLOC:
|
|
case GOMP_MAP_FORCE_FROM:
|
|
case GOMP_MAP_ALWAYS_FROM:
|
|
case GOMP_MAP_ALWAYS_PRESENT_FROM:
|
|
case GOMP_MAP_FORCE_PRESENT:
|
|
return false;
|
|
default:
|
|
return true;
|
|
}
|
|
}
|
|
|
|
/* Copy host memory to an offload device. In asynchronous mode (if AQ is
|
|
non-NULL), when the source data is stack or may otherwise be deallocated
|
|
before the asynchronous copy takes place, EPHEMERAL must be passed as
|
|
TRUE. */
|
|
|
|
attribute_hidden void
|
|
gomp_copy_host2dev (struct gomp_device_descr *devicep,
|
|
struct goacc_asyncqueue *aq,
|
|
void *d, const void *h, size_t sz,
|
|
bool ephemeral, struct gomp_coalesce_buf *cbuf)
|
|
{
|
|
if (cbuf)
|
|
{
|
|
uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
|
|
if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
|
|
{
|
|
long first = 0;
|
|
long last = cbuf->chunk_cnt - 1;
|
|
while (first <= last)
|
|
{
|
|
long middle = (first + last) >> 1;
|
|
if (cbuf->chunks[middle].end <= doff)
|
|
first = middle + 1;
|
|
else if (cbuf->chunks[middle].start <= doff)
|
|
{
|
|
if (doff + sz > cbuf->chunks[middle].end)
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
gomp_fatal ("internal libgomp cbuf error");
|
|
}
|
|
|
|
/* In an asynchronous context, verify that CBUF isn't used
|
|
with non-EPHEMERAL data; see 'gomp_coalesce_buf_add'. */
|
|
if (__builtin_expect (aq != NULL, 0))
|
|
assert (ephemeral);
|
|
|
|
memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
|
|
h, sz);
|
|
return;
|
|
}
|
|
else
|
|
last = middle - 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (__builtin_expect (aq != NULL, 0))
|
|
{
|
|
void *h_buf = (void *) h;
|
|
if (ephemeral)
|
|
{
|
|
/* We're queueing up an asynchronous copy from data that may
|
|
disappear before the transfer takes place (i.e. because it is a
|
|
stack local in a function that is no longer executing). As we've
|
|
not been able to use CBUF, make a copy of the data into a
|
|
temporary buffer. */
|
|
h_buf = gomp_malloc (sz);
|
|
memcpy (h_buf, h, sz);
|
|
}
|
|
goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
|
|
"dev", d, "host", h_buf, h, sz, aq);
|
|
if (ephemeral)
|
|
/* Free once the transfer has completed. */
|
|
devicep->openacc.async.queue_callback_func (aq, free, h_buf);
|
|
}
|
|
else
|
|
gomp_device_copy (devicep, devicep->host2dev_func,
|
|
"dev", d, "host", h, sz);
|
|
}
|
|
|
|
attribute_hidden void
|
|
gomp_copy_dev2host (struct gomp_device_descr *devicep,
|
|
struct goacc_asyncqueue *aq,
|
|
void *h, const void *d, size_t sz)
|
|
{
|
|
if (__builtin_expect (aq != NULL, 0))
|
|
goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
|
|
"host", h, "dev", d, NULL, sz, aq);
|
|
else
|
|
gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
|
|
}
|
|
|
|
static void
|
|
gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
|
|
{
|
|
if (!devicep->free_func (devicep->target_id, devptr))
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
gomp_fatal ("error in freeing device memory block at %p", devptr);
|
|
}
|
|
}
|
|
|
|
/* Increment reference count of a splay_tree_key region K by 1.
|
|
If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
|
|
increment the value if refcount is not yet contained in the set (used for
|
|
OpenMP 5.0, which specifies that a region's refcount is adjusted at most
|
|
once for each construct). */
|
|
|
|
static inline void
|
|
gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
|
|
{
|
|
if (k == NULL
|
|
|| k->refcount == REFCOUNT_INFINITY
|
|
|| k->refcount == REFCOUNT_ACC_MAP_DATA)
|
|
return;
|
|
|
|
uintptr_t *refcount_ptr = &k->refcount;
|
|
|
|
if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
|
|
refcount_ptr = &k->structelem_refcount;
|
|
else if (REFCOUNT_STRUCTELEM_P (k->refcount))
|
|
refcount_ptr = k->structelem_refcount_ptr;
|
|
|
|
if (refcount_set)
|
|
{
|
|
if (htab_find (*refcount_set, refcount_ptr))
|
|
return;
|
|
uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
|
|
*slot = refcount_ptr;
|
|
}
|
|
|
|
*refcount_ptr += 1;
|
|
return;
|
|
}
|
|
|
|
/* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
|
|
is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
|
|
track already seen refcounts, and only adjust the value if refcount is not
|
|
yet contained in the set (like gomp_increment_refcount).
|
|
|
|
Return out-values: set *DO_COPY to true if we set the refcount to zero, or
|
|
it is already zero and we know we decremented it earlier. This signals that
|
|
associated maps should be copied back to host.
|
|
|
|
*DO_REMOVE is set to true when we this is the first handling of this refcount
|
|
and we are setting it to zero. This signals a removal of this key from the
|
|
splay-tree map.
|
|
|
|
Copy and removal are separated due to cases like handling of structure
|
|
elements, e.g. each map of a structure element representing a possible copy
|
|
out of a structure field has to be handled individually, but we only signal
|
|
removal for one (the first encountered) sibing map. */
|
|
|
|
static inline void
|
|
gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
|
|
bool *do_copy, bool *do_remove)
|
|
{
|
|
if (k == NULL
|
|
|| k->refcount == REFCOUNT_INFINITY
|
|
|| k->refcount == REFCOUNT_ACC_MAP_DATA)
|
|
{
|
|
*do_copy = *do_remove = false;
|
|
return;
|
|
}
|
|
|
|
uintptr_t *refcount_ptr = &k->refcount;
|
|
|
|
if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
|
|
refcount_ptr = &k->structelem_refcount;
|
|
else if (REFCOUNT_STRUCTELEM_P (k->refcount))
|
|
refcount_ptr = k->structelem_refcount_ptr;
|
|
|
|
bool new_encountered_refcount;
|
|
bool set_to_zero = false;
|
|
bool is_zero = false;
|
|
|
|
uintptr_t orig_refcount = *refcount_ptr;
|
|
|
|
if (refcount_set)
|
|
{
|
|
if (htab_find (*refcount_set, refcount_ptr))
|
|
{
|
|
new_encountered_refcount = false;
|
|
goto end;
|
|
}
|
|
|
|
uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
|
|
*slot = refcount_ptr;
|
|
new_encountered_refcount = true;
|
|
}
|
|
else
|
|
/* If no refcount_set being used, assume all keys are being decremented
|
|
for the first time. */
|
|
new_encountered_refcount = true;
|
|
|
|
if (delete_p)
|
|
*refcount_ptr = 0;
|
|
else if (*refcount_ptr > 0)
|
|
*refcount_ptr -= 1;
|
|
|
|
end:
|
|
if (*refcount_ptr == 0)
|
|
{
|
|
if (orig_refcount > 0)
|
|
set_to_zero = true;
|
|
|
|
is_zero = true;
|
|
}
|
|
|
|
*do_copy = (set_to_zero || (!new_encountered_refcount && is_zero));
|
|
*do_remove = (new_encountered_refcount && set_to_zero);
|
|
}
|
|
|
|
/* Handle the case where gomp_map_lookup, splay_tree_lookup or
|
|
gomp_map_0len_lookup found oldn for newn.
|
|
Helper function of gomp_map_vars. */
|
|
|
|
static inline void
|
|
gomp_map_vars_existing (struct gomp_device_descr *devicep,
|
|
struct goacc_asyncqueue *aq, splay_tree_key oldn,
|
|
splay_tree_key newn, struct target_var_desc *tgt_var,
|
|
unsigned char kind, bool always_to_flag, bool implicit,
|
|
struct gomp_coalesce_buf *cbuf,
|
|
htab_t *refcount_set)
|
|
{
|
|
assert (kind != GOMP_MAP_ATTACH
|
|
|| kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
|
|
|
|
tgt_var->key = oldn;
|
|
tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
|
|
tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
|
|
tgt_var->is_attach = false;
|
|
tgt_var->offset = newn->host_start - oldn->host_start;
|
|
|
|
/* For implicit maps, old contained in new is valid. */
|
|
bool implicit_subset = (implicit
|
|
&& newn->host_start <= oldn->host_start
|
|
&& oldn->host_end <= newn->host_end);
|
|
if (implicit_subset)
|
|
tgt_var->length = oldn->host_end - oldn->host_start;
|
|
else
|
|
tgt_var->length = newn->host_end - newn->host_start;
|
|
|
|
if (GOMP_MAP_FORCE_P (kind)
|
|
/* For implicit maps, old contained in new is valid. */
|
|
|| !(implicit_subset
|
|
/* Otherwise, new contained inside old is considered valid. */
|
|
|| (oldn->host_start <= newn->host_start
|
|
&& newn->host_end <= oldn->host_end)))
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
gomp_fatal ("Trying to map into device [%p..%p) object when "
|
|
"[%p..%p) is already mapped",
|
|
(void *) newn->host_start, (void *) newn->host_end,
|
|
(void *) oldn->host_start, (void *) oldn->host_end);
|
|
}
|
|
|
|
if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
|
|
{
|
|
/* Implicit + always should not happen. If this does occur, below
|
|
address/length adjustment is a TODO. */
|
|
assert (!implicit_subset);
|
|
|
|
if (oldn->aux && oldn->aux->attach_count)
|
|
{
|
|
/* We have to be careful not to overwrite still attached pointers
|
|
during the copyback to host. */
|
|
uintptr_t addr = newn->host_start;
|
|
while (addr < newn->host_end)
|
|
{
|
|
size_t i = (addr - oldn->host_start) / sizeof (void *);
|
|
if (oldn->aux->attach_count[i] == 0)
|
|
gomp_copy_host2dev (devicep, aq,
|
|
(void *) (oldn->tgt->tgt_start
|
|
+ oldn->tgt_offset
|
|
+ addr - oldn->host_start),
|
|
(void *) addr,
|
|
sizeof (void *), false, cbuf);
|
|
addr += sizeof (void *);
|
|
}
|
|
}
|
|
else
|
|
gomp_copy_host2dev (devicep, aq,
|
|
(void *) (oldn->tgt->tgt_start + oldn->tgt_offset
|
|
+ newn->host_start - oldn->host_start),
|
|
(void *) newn->host_start,
|
|
newn->host_end - newn->host_start, false, cbuf);
|
|
}
|
|
|
|
gomp_increment_refcount (oldn, refcount_set);
|
|
}
|
|
|
|
static int
|
|
get_kind (bool short_mapkind, void *kinds, int idx)
|
|
{
|
|
if (!short_mapkind)
|
|
return ((unsigned char *) kinds)[idx];
|
|
|
|
int val = ((unsigned short *) kinds)[idx];
|
|
if (GOMP_MAP_IMPLICIT_P (val))
|
|
val &= ~GOMP_MAP_IMPLICIT;
|
|
return val;
|
|
}
|
|
|
|
|
|
static bool
|
|
get_implicit (bool short_mapkind, void *kinds, int idx)
|
|
{
|
|
if (!short_mapkind)
|
|
return false;
|
|
|
|
int val = ((unsigned short *) kinds)[idx];
|
|
return GOMP_MAP_IMPLICIT_P (val);
|
|
}
|
|
|
|
static void
|
|
gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
|
|
uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
|
|
struct gomp_coalesce_buf *cbuf,
|
|
bool allow_zero_length_array_sections)
|
|
{
|
|
struct gomp_device_descr *devicep = tgt->device_descr;
|
|
struct splay_tree_s *mem_map = &devicep->mem_map;
|
|
struct splay_tree_key_s cur_node;
|
|
|
|
cur_node.host_start = host_ptr;
|
|
if (cur_node.host_start == (uintptr_t) NULL)
|
|
{
|
|
cur_node.tgt_offset = (uintptr_t) NULL;
|
|
gomp_copy_host2dev (devicep, aq,
|
|
(void *) (tgt->tgt_start + target_offset),
|
|
(void *) &cur_node.tgt_offset, sizeof (void *),
|
|
true, cbuf);
|
|
return;
|
|
}
|
|
/* Add bias to the pointer value. */
|
|
cur_node.host_start += bias;
|
|
cur_node.host_end = cur_node.host_start;
|
|
splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
|
|
if (n == NULL)
|
|
{
|
|
if (allow_zero_length_array_sections)
|
|
cur_node.tgt_offset = cur_node.host_start;
|
|
else
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
gomp_fatal ("Pointer target of array section wasn't mapped");
|
|
}
|
|
}
|
|
else
|
|
{
|
|
cur_node.host_start -= n->host_start;
|
|
cur_node.tgt_offset
|
|
= n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
|
|
/* At this point tgt_offset is target address of the
|
|
array section. Now subtract bias to get what we want
|
|
to initialize the pointer with. */
|
|
cur_node.tgt_offset -= bias;
|
|
}
|
|
gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
|
|
(void *) &cur_node.tgt_offset, sizeof (void *),
|
|
true, cbuf);
|
|
}
|
|
|
|
static void
|
|
gomp_map_fields_existing (struct target_mem_desc *tgt,
|
|
struct goacc_asyncqueue *aq, splay_tree_key n,
|
|
size_t first, size_t i, void **hostaddrs,
|
|
size_t *sizes, void *kinds,
|
|
struct gomp_coalesce_buf *cbuf, htab_t *refcount_set)
|
|
{
|
|
struct gomp_device_descr *devicep = tgt->device_descr;
|
|
struct splay_tree_s *mem_map = &devicep->mem_map;
|
|
struct splay_tree_key_s cur_node;
|
|
int kind;
|
|
bool implicit;
|
|
const bool short_mapkind = true;
|
|
const int typemask = short_mapkind ? 0xff : 0x7;
|
|
|
|
cur_node.host_start = (uintptr_t) hostaddrs[i];
|
|
cur_node.host_end = cur_node.host_start + sizes[i];
|
|
splay_tree_key n2 = gomp_map_0len_lookup (mem_map, &cur_node);
|
|
kind = get_kind (short_mapkind, kinds, i);
|
|
implicit = get_implicit (short_mapkind, kinds, i);
|
|
if (n2
|
|
&& n2->tgt == n->tgt
|
|
&& n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
|
|
{
|
|
gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
|
|
kind & typemask, false, implicit, cbuf,
|
|
refcount_set);
|
|
return;
|
|
}
|
|
if (sizes[i] == 0)
|
|
{
|
|
if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
|
|
{
|
|
cur_node.host_start--;
|
|
n2 = splay_tree_lookup (mem_map, &cur_node);
|
|
cur_node.host_start++;
|
|
if (n2
|
|
&& n2->tgt == n->tgt
|
|
&& n2->host_start - n->host_start
|
|
== n2->tgt_offset - n->tgt_offset)
|
|
{
|
|
gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
|
|
kind & typemask, false, implicit, cbuf,
|
|
refcount_set);
|
|
return;
|
|
}
|
|
}
|
|
cur_node.host_end++;
|
|
n2 = splay_tree_lookup (mem_map, &cur_node);
|
|
cur_node.host_end--;
|
|
if (n2
|
|
&& n2->tgt == n->tgt
|
|
&& n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
|
|
{
|
|
gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
|
|
kind & typemask, false, implicit, cbuf,
|
|
refcount_set);
|
|
return;
|
|
}
|
|
}
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
gomp_fatal ("Trying to map into device [%p..%p) structure element when "
|
|
"other mapped elements from the same structure weren't mapped "
|
|
"together with it", (void *) cur_node.host_start,
|
|
(void *) cur_node.host_end);
|
|
}
|
|
|
|
attribute_hidden void
|
|
gomp_attach_pointer (struct gomp_device_descr *devicep,
|
|
struct goacc_asyncqueue *aq, splay_tree mem_map,
|
|
splay_tree_key n, uintptr_t attach_to, size_t bias,
|
|
struct gomp_coalesce_buf *cbufp,
|
|
bool allow_zero_length_array_sections)
|
|
{
|
|
struct splay_tree_key_s s;
|
|
size_t size, idx;
|
|
|
|
if (n == NULL)
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
gomp_fatal ("enclosing struct not mapped for attach");
|
|
}
|
|
|
|
size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
|
|
/* We might have a pointer in a packed struct: however we cannot have more
|
|
than one such pointer in each pointer-sized portion of the struct, so
|
|
this is safe. */
|
|
idx = (attach_to - n->host_start) / sizeof (void *);
|
|
|
|
if (!n->aux)
|
|
n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
|
|
|
|
if (!n->aux->attach_count)
|
|
n->aux->attach_count
|
|
= gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
|
|
|
|
if (n->aux->attach_count[idx] < UINTPTR_MAX)
|
|
n->aux->attach_count[idx]++;
|
|
else
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
gomp_fatal ("attach count overflow");
|
|
}
|
|
|
|
if (n->aux->attach_count[idx] == 1)
|
|
{
|
|
uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
|
|
- n->host_start;
|
|
uintptr_t target = (uintptr_t) *(void **) attach_to;
|
|
splay_tree_key tn;
|
|
uintptr_t data;
|
|
|
|
if ((void *) target == NULL)
|
|
{
|
|
/* As a special case, allow attaching NULL host pointers. This
|
|
allows e.g. unassociated Fortran pointers to be mapped
|
|
properly. */
|
|
data = 0;
|
|
|
|
gomp_debug (1,
|
|
"%s: attaching NULL host pointer, target %p "
|
|
"(struct base %p)\n", __FUNCTION__, (void *) devptr,
|
|
(void *) (n->tgt->tgt_start + n->tgt_offset));
|
|
|
|
gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
|
|
sizeof (void *), true, cbufp);
|
|
|
|
return;
|
|
}
|
|
|
|
s.host_start = target + bias;
|
|
s.host_end = s.host_start + 1;
|
|
tn = splay_tree_lookup (mem_map, &s);
|
|
|
|
if (!tn)
|
|
{
|
|
if (allow_zero_length_array_sections)
|
|
/* When allowing attachment to zero-length array sections, we
|
|
copy the host pointer when the target region is not mapped. */
|
|
data = target;
|
|
else
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
gomp_fatal ("pointer target not mapped for attach");
|
|
}
|
|
}
|
|
else
|
|
data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
|
|
|
|
gomp_debug (1,
|
|
"%s: attaching host %p, target %p (struct base %p) to %p\n",
|
|
__FUNCTION__, (void *) attach_to, (void *) devptr,
|
|
(void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
|
|
|
|
gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
|
|
sizeof (void *), true, cbufp);
|
|
}
|
|
else
|
|
gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
|
|
(void *) attach_to, (int) n->aux->attach_count[idx]);
|
|
}
|
|
|
|
attribute_hidden void
|
|
gomp_detach_pointer (struct gomp_device_descr *devicep,
|
|
struct goacc_asyncqueue *aq, splay_tree_key n,
|
|
uintptr_t detach_from, bool finalize,
|
|
struct gomp_coalesce_buf *cbufp)
|
|
{
|
|
size_t idx;
|
|
|
|
if (n == NULL)
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
gomp_fatal ("enclosing struct not mapped for detach");
|
|
}
|
|
|
|
idx = (detach_from - n->host_start) / sizeof (void *);
|
|
|
|
if (!n->aux || !n->aux->attach_count)
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
gomp_fatal ("no attachment counters for struct");
|
|
}
|
|
|
|
if (finalize)
|
|
n->aux->attach_count[idx] = 1;
|
|
|
|
if (n->aux->attach_count[idx] == 0)
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
gomp_fatal ("attach count underflow");
|
|
}
|
|
else
|
|
n->aux->attach_count[idx]--;
|
|
|
|
if (n->aux->attach_count[idx] == 0)
|
|
{
|
|
uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
|
|
- n->host_start;
|
|
uintptr_t target = (uintptr_t) *(void **) detach_from;
|
|
|
|
gomp_debug (1,
|
|
"%s: detaching host %p, target %p (struct base %p) to %p\n",
|
|
__FUNCTION__, (void *) detach_from, (void *) devptr,
|
|
(void *) (n->tgt->tgt_start + n->tgt_offset),
|
|
(void *) target);
|
|
|
|
gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
|
|
sizeof (void *), true, cbufp);
|
|
}
|
|
else
|
|
gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
|
|
(void *) detach_from, (int) n->aux->attach_count[idx]);
|
|
}
|
|
|
|
attribute_hidden uintptr_t
|
|
gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
|
|
{
|
|
if (tgt->list[i].key != NULL)
|
|
return tgt->list[i].key->tgt->tgt_start
|
|
+ tgt->list[i].key->tgt_offset
|
|
+ tgt->list[i].offset;
|
|
|
|
switch (tgt->list[i].offset)
|
|
{
|
|
case OFFSET_INLINED:
|
|
return (uintptr_t) hostaddrs[i];
|
|
|
|
case OFFSET_POINTER:
|
|
return 0;
|
|
|
|
case OFFSET_STRUCT:
|
|
return tgt->list[i + 1].key->tgt->tgt_start
|
|
+ tgt->list[i + 1].key->tgt_offset
|
|
+ tgt->list[i + 1].offset
|
|
+ (uintptr_t) hostaddrs[i]
|
|
- (uintptr_t) hostaddrs[i + 1];
|
|
|
|
default:
|
|
return tgt->tgt_start + tgt->list[i].offset;
|
|
}
|
|
}
|
|
|
|
static inline __attribute__((always_inline)) struct target_mem_desc *
|
|
gomp_map_vars_internal (struct gomp_device_descr *devicep,
|
|
struct goacc_asyncqueue *aq, size_t mapnum,
|
|
void **hostaddrs, void **devaddrs, size_t *sizes,
|
|
void *kinds, bool short_mapkind,
|
|
htab_t *refcount_set,
|
|
enum gomp_map_vars_kind pragma_kind)
|
|
{
|
|
size_t i, tgt_align, tgt_size, not_found_cnt = 0;
|
|
bool has_firstprivate = false;
|
|
bool has_always_ptrset = false;
|
|
bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0;
|
|
const int rshift = short_mapkind ? 8 : 3;
|
|
const int typemask = short_mapkind ? 0xff : 0x7;
|
|
struct splay_tree_s *mem_map = &devicep->mem_map;
|
|
struct splay_tree_key_s cur_node;
|
|
struct target_mem_desc *tgt
|
|
= gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
|
|
tgt->list_count = mapnum;
|
|
tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
|
|
tgt->device_descr = devicep;
|
|
tgt->prev = NULL;
|
|
struct gomp_coalesce_buf cbuf, *cbufp = NULL;
|
|
|
|
if (mapnum == 0)
|
|
{
|
|
tgt->tgt_start = 0;
|
|
tgt->tgt_end = 0;
|
|
return tgt;
|
|
}
|
|
|
|
tgt_align = sizeof (void *);
|
|
tgt_size = 0;
|
|
cbuf.chunks = NULL;
|
|
cbuf.chunk_cnt = -1;
|
|
cbuf.use_cnt = 0;
|
|
cbuf.buf = NULL;
|
|
if (mapnum > 1 || (pragma_kind & GOMP_MAP_VARS_TARGET))
|
|
{
|
|
size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
|
|
cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
|
|
cbuf.chunk_cnt = 0;
|
|
}
|
|
if (pragma_kind & GOMP_MAP_VARS_TARGET)
|
|
{
|
|
size_t align = 4 * sizeof (void *);
|
|
tgt_align = align;
|
|
tgt_size = mapnum * sizeof (void *);
|
|
cbuf.chunk_cnt = 1;
|
|
cbuf.use_cnt = 1 + (mapnum > 1);
|
|
cbuf.chunks[0].start = 0;
|
|
cbuf.chunks[0].end = tgt_size;
|
|
}
|
|
|
|
gomp_mutex_lock (&devicep->lock);
|
|
if (devicep->state == GOMP_DEVICE_FINALIZED)
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
free (tgt);
|
|
return NULL;
|
|
}
|
|
|
|
for (i = 0; i < mapnum; i++)
|
|
{
|
|
int kind = get_kind (short_mapkind, kinds, i);
|
|
bool implicit = get_implicit (short_mapkind, kinds, i);
|
|
if (hostaddrs[i] == NULL
|
|
|| (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
|
|
{
|
|
tgt->list[i].key = NULL;
|
|
tgt->list[i].offset = OFFSET_INLINED;
|
|
continue;
|
|
}
|
|
else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
|
|
|| (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
|
|
{
|
|
tgt->list[i].key = NULL;
|
|
if (!not_found_cnt)
|
|
{
|
|
/* In OpenMP < 5.0 and OpenACC the mapping has to be done
|
|
on a separate construct prior to using use_device_{addr,ptr}.
|
|
In OpenMP 5.0, map directives need to be ordered by the
|
|
middle-end before the use_device_* clauses. If
|
|
!not_found_cnt, all mappings requested (if any) are already
|
|
mapped, so use_device_{addr,ptr} can be resolved right away.
|
|
Otherwise, if not_found_cnt, gomp_map_lookup might fail
|
|
now but would succeed after performing the mappings in the
|
|
following loop. We can't defer this always to the second
|
|
loop, because it is not even invoked when !not_found_cnt
|
|
after the first loop. */
|
|
cur_node.host_start = (uintptr_t) hostaddrs[i];
|
|
cur_node.host_end = cur_node.host_start;
|
|
splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
|
|
if (n != NULL)
|
|
{
|
|
cur_node.host_start -= n->host_start;
|
|
hostaddrs[i]
|
|
= (void *) (n->tgt->tgt_start + n->tgt_offset
|
|
+ cur_node.host_start);
|
|
}
|
|
else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
gomp_fatal ("use_device_ptr pointer wasn't mapped");
|
|
}
|
|
else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
|
|
/* If not present, continue using the host address. */
|
|
;
|
|
else
|
|
__builtin_unreachable ();
|
|
tgt->list[i].offset = OFFSET_INLINED;
|
|
}
|
|
else
|
|
tgt->list[i].offset = 0;
|
|
continue;
|
|
}
|
|
else if ((kind & typemask) == GOMP_MAP_STRUCT
|
|
|| (kind & typemask) == GOMP_MAP_STRUCT_UNORD)
|
|
{
|
|
size_t first = i + 1;
|
|
size_t last = i + sizes[i];
|
|
cur_node.host_start = (uintptr_t) hostaddrs[i];
|
|
cur_node.host_end = (uintptr_t) hostaddrs[last]
|
|
+ sizes[last];
|
|
tgt->list[i].key = NULL;
|
|
tgt->list[i].offset = OFFSET_STRUCT;
|
|
splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
|
|
if (n == NULL)
|
|
{
|
|
size_t align = (size_t) 1 << (kind >> rshift);
|
|
if (tgt_align < align)
|
|
tgt_align = align;
|
|
tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
|
|
tgt_size = (tgt_size + align - 1) & ~(align - 1);
|
|
tgt_size += cur_node.host_end - cur_node.host_start;
|
|
not_found_cnt += last - i;
|
|
for (i = first; i <= last; i++)
|
|
{
|
|
tgt->list[i].key = NULL;
|
|
if (!aq
|
|
&& gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
|
|
& typemask)
|
|
&& sizes[i] != 0)
|
|
gomp_coalesce_buf_add (&cbuf,
|
|
tgt_size - cur_node.host_end
|
|
+ (uintptr_t) hostaddrs[i],
|
|
sizes[i]);
|
|
}
|
|
i--;
|
|
continue;
|
|
}
|
|
for (i = first; i <= last; i++)
|
|
gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
|
|
sizes, kinds, NULL, refcount_set);
|
|
i--;
|
|
continue;
|
|
}
|
|
else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
|
|
{
|
|
tgt->list[i].key = NULL;
|
|
tgt->list[i].offset = OFFSET_POINTER;
|
|
has_firstprivate = true;
|
|
continue;
|
|
}
|
|
else if ((kind & typemask) == GOMP_MAP_ATTACH
|
|
|| ((kind & typemask)
|
|
== GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION))
|
|
{
|
|
tgt->list[i].key = NULL;
|
|
has_firstprivate = true;
|
|
continue;
|
|
}
|
|
cur_node.host_start = (uintptr_t) hostaddrs[i];
|
|
if (!GOMP_MAP_POINTER_P (kind & typemask))
|
|
cur_node.host_end = cur_node.host_start + sizes[i];
|
|
else
|
|
cur_node.host_end = cur_node.host_start + sizeof (void *);
|
|
if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
|
|
{
|
|
tgt->list[i].key = NULL;
|
|
|
|
size_t align = (size_t) 1 << (kind >> rshift);
|
|
if (tgt_align < align)
|
|
tgt_align = align;
|
|
tgt_size = (tgt_size + align - 1) & ~(align - 1);
|
|
if (!aq)
|
|
gomp_coalesce_buf_add (&cbuf, tgt_size,
|
|
cur_node.host_end - cur_node.host_start);
|
|
tgt_size += cur_node.host_end - cur_node.host_start;
|
|
has_firstprivate = true;
|
|
continue;
|
|
}
|
|
splay_tree_key n;
|
|
if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
|
|
{
|
|
n = gomp_map_0len_lookup (mem_map, &cur_node);
|
|
if (!n)
|
|
{
|
|
tgt->list[i].key = NULL;
|
|
tgt->list[i].offset = OFFSET_INLINED;
|
|
continue;
|
|
}
|
|
}
|
|
else
|
|
n = splay_tree_lookup (mem_map, &cur_node);
|
|
if (n && n->refcount != REFCOUNT_LINK)
|
|
{
|
|
int always_to_cnt = 0;
|
|
if ((kind & typemask) == GOMP_MAP_TO_PSET)
|
|
{
|
|
bool has_nullptr = false;
|
|
size_t j;
|
|
for (j = 0; j < n->tgt->list_count; j++)
|
|
if (n->tgt->list[j].key == n)
|
|
{
|
|
has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
|
|
break;
|
|
}
|
|
if (n->tgt->list_count == 0)
|
|
{
|
|
/* 'declare target'; assume has_nullptr; it could also be
|
|
statically assigned pointer, but that it should be to
|
|
the equivalent variable on the host. */
|
|
assert (n->refcount == REFCOUNT_INFINITY);
|
|
has_nullptr = true;
|
|
}
|
|
else
|
|
assert (j < n->tgt->list_count);
|
|
/* Re-map the data if there is an 'always' modifier or if it a
|
|
null pointer was there and non a nonnull has been found; that
|
|
permits transparent re-mapping for Fortran array descriptors
|
|
which were previously mapped unallocated. */
|
|
for (j = i + 1; j < mapnum; j++)
|
|
{
|
|
int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
|
|
if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
|
|
&& (!has_nullptr
|
|
|| !GOMP_MAP_POINTER_P (ptr_kind)
|
|
|| *(void **) hostaddrs[j] == NULL))
|
|
break;
|
|
else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
|
|
|| ((uintptr_t) hostaddrs[j] + sizeof (void *)
|
|
> cur_node.host_end))
|
|
break;
|
|
else
|
|
{
|
|
has_always_ptrset = true;
|
|
++always_to_cnt;
|
|
}
|
|
}
|
|
}
|
|
gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
|
|
kind & typemask, always_to_cnt > 0, implicit,
|
|
NULL, refcount_set);
|
|
i += always_to_cnt;
|
|
}
|
|
else
|
|
{
|
|
tgt->list[i].key = NULL;
|
|
|
|
if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
|
|
{
|
|
/* Not present, hence, skip entry - including its MAP_POINTER,
|
|
when existing. */
|
|
tgt->list[i].offset = OFFSET_INLINED;
|
|
if (i + 1 < mapnum
|
|
&& ((typemask & get_kind (short_mapkind, kinds, i + 1))
|
|
== GOMP_MAP_POINTER))
|
|
{
|
|
++i;
|
|
tgt->list[i].key = NULL;
|
|
tgt->list[i].offset = 0;
|
|
}
|
|
continue;
|
|
}
|
|
size_t align = (size_t) 1 << (kind >> rshift);
|
|
not_found_cnt++;
|
|
if (tgt_align < align)
|
|
tgt_align = align;
|
|
tgt_size = (tgt_size + align - 1) & ~(align - 1);
|
|
if (!aq
|
|
&& gomp_to_device_kind_p (kind & typemask))
|
|
gomp_coalesce_buf_add (&cbuf, tgt_size,
|
|
cur_node.host_end - cur_node.host_start);
|
|
tgt_size += cur_node.host_end - cur_node.host_start;
|
|
if ((kind & typemask) == GOMP_MAP_TO_PSET)
|
|
{
|
|
size_t j;
|
|
int kind;
|
|
for (j = i + 1; j < mapnum; j++)
|
|
if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
|
|
kinds, j)) & typemask))
|
|
&& !GOMP_MAP_ALWAYS_POINTER_P (kind))
|
|
break;
|
|
else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
|
|
|| ((uintptr_t) hostaddrs[j] + sizeof (void *)
|
|
> cur_node.host_end))
|
|
break;
|
|
else
|
|
{
|
|
tgt->list[j].key = NULL;
|
|
i++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (devaddrs)
|
|
{
|
|
if (mapnum != 1)
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
gomp_fatal ("unexpected aggregation");
|
|
}
|
|
tgt->to_free = devaddrs[0];
|
|
tgt->tgt_start = (uintptr_t) tgt->to_free;
|
|
tgt->tgt_end = tgt->tgt_start + sizes[0];
|
|
}
|
|
else if (not_found_cnt || (pragma_kind & GOMP_MAP_VARS_TARGET))
|
|
{
|
|
/* Allocate tgt_align aligned tgt_size block of memory. */
|
|
/* FIXME: Perhaps change interface to allocate properly aligned
|
|
memory. */
|
|
tgt->to_free = devicep->alloc_func (devicep->target_id,
|
|
tgt_size + tgt_align - 1);
|
|
if (!tgt->to_free)
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
gomp_fatal ("device memory allocation fail");
|
|
}
|
|
|
|
tgt->tgt_start = (uintptr_t) tgt->to_free;
|
|
tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
|
|
tgt->tgt_end = tgt->tgt_start + tgt_size;
|
|
|
|
if (cbuf.use_cnt == 1)
|
|
cbuf.chunk_cnt--;
|
|
if (cbuf.chunk_cnt > 0)
|
|
{
|
|
cbuf.buf
|
|
= malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
|
|
if (cbuf.buf)
|
|
{
|
|
cbuf.tgt = tgt;
|
|
cbufp = &cbuf;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
tgt->to_free = NULL;
|
|
tgt->tgt_start = 0;
|
|
tgt->tgt_end = 0;
|
|
}
|
|
|
|
tgt_size = 0;
|
|
if (pragma_kind & GOMP_MAP_VARS_TARGET)
|
|
tgt_size = mapnum * sizeof (void *);
|
|
|
|
tgt->array = NULL;
|
|
if (not_found_cnt || has_firstprivate || has_always_ptrset)
|
|
{
|
|
if (not_found_cnt)
|
|
tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
|
|
splay_tree_node array = tgt->array;
|
|
size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
|
|
uintptr_t field_tgt_base = 0;
|
|
splay_tree_key field_tgt_structelem_first = NULL;
|
|
|
|
for (i = 0; i < mapnum; i++)
|
|
if (has_always_ptrset
|
|
&& tgt->list[i].key
|
|
&& (get_kind (short_mapkind, kinds, i) & typemask)
|
|
== GOMP_MAP_TO_PSET)
|
|
{
|
|
splay_tree_key k = tgt->list[i].key;
|
|
bool has_nullptr = false;
|
|
size_t j;
|
|
for (j = 0; j < k->tgt->list_count; j++)
|
|
if (k->tgt->list[j].key == k)
|
|
{
|
|
has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
|
|
break;
|
|
}
|
|
if (k->tgt->list_count == 0)
|
|
has_nullptr = true;
|
|
else
|
|
assert (j < k->tgt->list_count);
|
|
|
|
tgt->list[i].has_null_ptr_assoc = false;
|
|
for (j = i + 1; j < mapnum; j++)
|
|
{
|
|
int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
|
|
if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
|
|
&& (!has_nullptr
|
|
|| !GOMP_MAP_POINTER_P (ptr_kind)
|
|
|| *(void **) hostaddrs[j] == NULL))
|
|
break;
|
|
else if ((uintptr_t) hostaddrs[j] < k->host_start
|
|
|| ((uintptr_t) hostaddrs[j] + sizeof (void *)
|
|
> k->host_end))
|
|
break;
|
|
else
|
|
{
|
|
if (*(void **) hostaddrs[j] == NULL)
|
|
tgt->list[i].has_null_ptr_assoc = true;
|
|
tgt->list[j].key = k;
|
|
tgt->list[j].copy_from = false;
|
|
tgt->list[j].always_copy_from = false;
|
|
tgt->list[j].is_attach = false;
|
|
gomp_increment_refcount (k, refcount_set);
|
|
gomp_map_pointer (k->tgt, aq,
|
|
(uintptr_t) *(void **) hostaddrs[j],
|
|
k->tgt_offset + ((uintptr_t) hostaddrs[j]
|
|
- k->host_start),
|
|
sizes[j], cbufp, false);
|
|
}
|
|
}
|
|
i = j - 1;
|
|
}
|
|
else if (tgt->list[i].key == NULL)
|
|
{
|
|
int kind = get_kind (short_mapkind, kinds, i);
|
|
bool implicit = get_implicit (short_mapkind, kinds, i);
|
|
if (hostaddrs[i] == NULL)
|
|
continue;
|
|
switch (kind & typemask)
|
|
{
|
|
size_t align, len, first, last;
|
|
splay_tree_key n;
|
|
case GOMP_MAP_FIRSTPRIVATE:
|
|
align = (size_t) 1 << (kind >> rshift);
|
|
tgt_size = (tgt_size + align - 1) & ~(align - 1);
|
|
tgt->list[i].offset = tgt_size;
|
|
len = sizes[i];
|
|
gomp_copy_host2dev (devicep, aq,
|
|
(void *) (tgt->tgt_start + tgt_size),
|
|
(void *) hostaddrs[i], len, false, cbufp);
|
|
/* Save device address in hostaddr to permit latter availablity
|
|
when doing a deep-firstprivate with pointer attach. */
|
|
hostaddrs[i] = (void *) (tgt->tgt_start + tgt_size);
|
|
tgt_size += len;
|
|
|
|
/* If followed by GOMP_MAP_ATTACH, pointer assign this
|
|
firstprivate to hostaddrs[i+1], which is assumed to contain a
|
|
device address. */
|
|
if (i + 1 < mapnum
|
|
&& (GOMP_MAP_ATTACH
|
|
== (typemask & get_kind (short_mapkind, kinds, i+1))))
|
|
{
|
|
uintptr_t target = (uintptr_t) hostaddrs[i];
|
|
void *devptr = *(void**) hostaddrs[i+1] + sizes[i+1];
|
|
/* Per
|
|
<https://inbox.sourceware.org/gcc-patches/87o7pe12ke.fsf@euler.schwinge.homeip.net>
|
|
"OpenMP: Handle descriptors in target's firstprivate [PR104949]"
|
|
this probably needs revision for 'aq' usage. */
|
|
assert (!aq);
|
|
gomp_copy_host2dev (devicep, aq, devptr, &target,
|
|
sizeof (void *), false, cbufp);
|
|
++i;
|
|
}
|
|
continue;
|
|
case GOMP_MAP_FIRSTPRIVATE_INT:
|
|
case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
|
|
continue;
|
|
case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
|
|
/* The OpenACC 'host_data' construct only allows 'use_device'
|
|
"mapping" clauses, so in the first loop, 'not_found_cnt'
|
|
must always have been zero, so all OpenACC 'use_device'
|
|
clauses have already been handled. (We can only easily test
|
|
'use_device' with 'if_present' clause here.) */
|
|
assert (tgt->list[i].offset == OFFSET_INLINED);
|
|
/* Nevertheless, FALLTHRU to the normal handling, to keep the
|
|
code conceptually simple, similar to the first loop. */
|
|
case GOMP_MAP_USE_DEVICE_PTR:
|
|
if (tgt->list[i].offset == 0)
|
|
{
|
|
cur_node.host_start = (uintptr_t) hostaddrs[i];
|
|
cur_node.host_end = cur_node.host_start;
|
|
n = gomp_map_lookup (mem_map, &cur_node);
|
|
if (n != NULL)
|
|
{
|
|
cur_node.host_start -= n->host_start;
|
|
hostaddrs[i]
|
|
= (void *) (n->tgt->tgt_start + n->tgt_offset
|
|
+ cur_node.host_start);
|
|
}
|
|
else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
gomp_fatal ("use_device_ptr pointer wasn't mapped");
|
|
}
|
|
else if ((kind & typemask)
|
|
== GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
|
|
/* If not present, continue using the host address. */
|
|
;
|
|
else
|
|
__builtin_unreachable ();
|
|
tgt->list[i].offset = OFFSET_INLINED;
|
|
}
|
|
continue;
|
|
case GOMP_MAP_STRUCT_UNORD:
|
|
if (sizes[i] > 1)
|
|
{
|
|
void *first = hostaddrs[i + 1];
|
|
for (size_t j = i + 1; j < i + sizes[i]; j++)
|
|
if (hostaddrs[j + 1] != first)
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
gomp_fatal ("Mapped array elements must be the "
|
|
"same (%p vs %p)", first,
|
|
hostaddrs[j + 1]);
|
|
}
|
|
}
|
|
/* Fallthrough. */
|
|
case GOMP_MAP_STRUCT:
|
|
first = i + 1;
|
|
last = i + sizes[i];
|
|
cur_node.host_start = (uintptr_t) hostaddrs[i];
|
|
cur_node.host_end = (uintptr_t) hostaddrs[last]
|
|
+ sizes[last];
|
|
if (tgt->list[first].key != NULL)
|
|
continue;
|
|
if (sizes[last] == 0)
|
|
cur_node.host_end++;
|
|
n = splay_tree_lookup (mem_map, &cur_node);
|
|
if (sizes[last] == 0)
|
|
cur_node.host_end--;
|
|
if (n == NULL && cur_node.host_start == cur_node.host_end)
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
gomp_fatal ("Struct pointer member not mapped (%p)",
|
|
(void*) hostaddrs[first]);
|
|
}
|
|
if (n == NULL)
|
|
{
|
|
size_t align = (size_t) 1 << (kind >> rshift);
|
|
tgt_size -= (uintptr_t) hostaddrs[first]
|
|
- (uintptr_t) hostaddrs[i];
|
|
tgt_size = (tgt_size + align - 1) & ~(align - 1);
|
|
tgt_size += (uintptr_t) hostaddrs[first]
|
|
- (uintptr_t) hostaddrs[i];
|
|
field_tgt_base = (uintptr_t) hostaddrs[first];
|
|
field_tgt_offset = tgt_size;
|
|
field_tgt_clear = last;
|
|
field_tgt_structelem_first = NULL;
|
|
tgt_size += cur_node.host_end
|
|
- (uintptr_t) hostaddrs[first];
|
|
continue;
|
|
}
|
|
for (i = first; i <= last; i++)
|
|
gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
|
|
sizes, kinds, cbufp, refcount_set);
|
|
i--;
|
|
continue;
|
|
case GOMP_MAP_ALWAYS_POINTER:
|
|
cur_node.host_start = (uintptr_t) hostaddrs[i];
|
|
cur_node.host_end = cur_node.host_start + sizeof (void *);
|
|
n = splay_tree_lookup (mem_map, &cur_node);
|
|
if (n == NULL
|
|
|| n->host_start > cur_node.host_start
|
|
|| n->host_end < cur_node.host_end)
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
gomp_fatal ("always pointer not mapped");
|
|
}
|
|
if (i > 0
|
|
&& ((get_kind (short_mapkind, kinds, i - 1) & typemask)
|
|
!= GOMP_MAP_ALWAYS_POINTER))
|
|
cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
|
|
if (cur_node.tgt_offset)
|
|
cur_node.tgt_offset -= sizes[i];
|
|
gomp_copy_host2dev (devicep, aq,
|
|
(void *) (n->tgt->tgt_start
|
|
+ n->tgt_offset
|
|
+ cur_node.host_start
|
|
- n->host_start),
|
|
(void *) &cur_node.tgt_offset,
|
|
sizeof (void *), true, cbufp);
|
|
cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
|
|
+ cur_node.host_start - n->host_start;
|
|
continue;
|
|
case GOMP_MAP_IF_PRESENT:
|
|
/* Not present - otherwise handled above. Skip over its
|
|
MAP_POINTER as well. */
|
|
if (i + 1 < mapnum
|
|
&& ((typemask & get_kind (short_mapkind, kinds, i + 1))
|
|
== GOMP_MAP_POINTER))
|
|
++i;
|
|
continue;
|
|
case GOMP_MAP_ATTACH:
|
|
case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION:
|
|
{
|
|
cur_node.host_start = (uintptr_t) hostaddrs[i];
|
|
cur_node.host_end = cur_node.host_start + sizeof (void *);
|
|
splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
|
|
if (n != NULL)
|
|
{
|
|
tgt->list[i].key = n;
|
|
tgt->list[i].offset = cur_node.host_start - n->host_start;
|
|
tgt->list[i].length = n->host_end - n->host_start;
|
|
tgt->list[i].copy_from = false;
|
|
tgt->list[i].always_copy_from = false;
|
|
tgt->list[i].is_attach = true;
|
|
/* OpenACC 'attach'/'detach' doesn't affect
|
|
structured/dynamic reference counts ('n->refcount',
|
|
'n->dynamic_refcount'). */
|
|
|
|
bool zlas
|
|
= ((kind & typemask)
|
|
== GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
|
|
gomp_attach_pointer (devicep, aq, mem_map, n,
|
|
(uintptr_t) hostaddrs[i], sizes[i],
|
|
cbufp, zlas);
|
|
}
|
|
else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
gomp_fatal ("outer struct not mapped for attach");
|
|
}
|
|
continue;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
splay_tree_key k = &array->key;
|
|
k->host_start = (uintptr_t) hostaddrs[i];
|
|
if (!GOMP_MAP_POINTER_P (kind & typemask))
|
|
k->host_end = k->host_start + sizes[i];
|
|
else
|
|
k->host_end = k->host_start + sizeof (void *);
|
|
splay_tree_key n = splay_tree_lookup (mem_map, k);
|
|
if (n && n->refcount != REFCOUNT_LINK)
|
|
{
|
|
if (field_tgt_clear != FIELD_TGT_EMPTY)
|
|
{
|
|
/* For this condition to be true, there must be a
|
|
duplicate struct element mapping. This can happen with
|
|
GOMP_MAP_STRUCT_UNORD mappings, for example. */
|
|
tgt->list[i].key = n;
|
|
if (openmp_p)
|
|
{
|
|
assert ((n->refcount & REFCOUNT_STRUCTELEM) != 0);
|
|
assert (field_tgt_structelem_first != NULL);
|
|
|
|
if (i == field_tgt_clear)
|
|
{
|
|
n->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
|
|
field_tgt_structelem_first = NULL;
|
|
}
|
|
}
|
|
if (i == field_tgt_clear)
|
|
field_tgt_clear = FIELD_TGT_EMPTY;
|
|
gomp_increment_refcount (n, refcount_set);
|
|
tgt->list[i].copy_from
|
|
= GOMP_MAP_COPY_FROM_P (kind & typemask);
|
|
tgt->list[i].always_copy_from
|
|
= GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
|
|
tgt->list[i].is_attach = false;
|
|
tgt->list[i].offset = 0;
|
|
tgt->list[i].length = k->host_end - k->host_start;
|
|
}
|
|
else
|
|
gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
|
|
kind & typemask, false, implicit,
|
|
cbufp, refcount_set);
|
|
}
|
|
else
|
|
{
|
|
k->aux = NULL;
|
|
if (n && n->refcount == REFCOUNT_LINK)
|
|
{
|
|
/* Replace target address of the pointer with target address
|
|
of mapped object in the splay tree. */
|
|
splay_tree_remove (mem_map, n);
|
|
k->aux
|
|
= gomp_malloc_cleared (sizeof (struct splay_tree_aux));
|
|
k->aux->link_key = n;
|
|
}
|
|
size_t align = (size_t) 1 << (kind >> rshift);
|
|
tgt->list[i].key = k;
|
|
k->tgt = tgt;
|
|
k->refcount = 0;
|
|
k->dynamic_refcount = 0;
|
|
if (field_tgt_clear != FIELD_TGT_EMPTY)
|
|
{
|
|
k->tgt_offset = k->host_start - field_tgt_base
|
|
+ field_tgt_offset;
|
|
if (openmp_p)
|
|
{
|
|
k->refcount = REFCOUNT_STRUCTELEM;
|
|
if (field_tgt_structelem_first == NULL)
|
|
{
|
|
/* Set to first structure element of sequence. */
|
|
k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST;
|
|
field_tgt_structelem_first = k;
|
|
}
|
|
else
|
|
/* Point to refcount of leading element, but do not
|
|
increment again. */
|
|
k->structelem_refcount_ptr
|
|
= &field_tgt_structelem_first->structelem_refcount;
|
|
|
|
if (i == field_tgt_clear)
|
|
{
|
|
k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
|
|
field_tgt_structelem_first = NULL;
|
|
}
|
|
}
|
|
if (i == field_tgt_clear)
|
|
field_tgt_clear = FIELD_TGT_EMPTY;
|
|
}
|
|
else
|
|
{
|
|
tgt_size = (tgt_size + align - 1) & ~(align - 1);
|
|
k->tgt_offset = tgt_size;
|
|
tgt_size += k->host_end - k->host_start;
|
|
}
|
|
/* First increment, from 0 to 1. gomp_increment_refcount
|
|
encapsulates the different increment cases, so use this
|
|
instead of directly setting 1 during initialization. */
|
|
gomp_increment_refcount (k, refcount_set);
|
|
|
|
tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
|
|
tgt->list[i].always_copy_from
|
|
= GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
|
|
tgt->list[i].is_attach = false;
|
|
tgt->list[i].offset = 0;
|
|
tgt->list[i].length = k->host_end - k->host_start;
|
|
tgt->refcount++;
|
|
array->left = NULL;
|
|
array->right = NULL;
|
|
splay_tree_insert (mem_map, array);
|
|
switch (kind & typemask)
|
|
{
|
|
case GOMP_MAP_ALLOC:
|
|
case GOMP_MAP_FROM:
|
|
case GOMP_MAP_FORCE_ALLOC:
|
|
case GOMP_MAP_FORCE_FROM:
|
|
case GOMP_MAP_ALWAYS_FROM:
|
|
break;
|
|
case GOMP_MAP_TO:
|
|
case GOMP_MAP_TOFROM:
|
|
case GOMP_MAP_FORCE_TO:
|
|
case GOMP_MAP_FORCE_TOFROM:
|
|
case GOMP_MAP_ALWAYS_TO:
|
|
case GOMP_MAP_ALWAYS_TOFROM:
|
|
gomp_copy_host2dev (devicep, aq,
|
|
(void *) (tgt->tgt_start
|
|
+ k->tgt_offset),
|
|
(void *) k->host_start,
|
|
k->host_end - k->host_start,
|
|
false, cbufp);
|
|
break;
|
|
case GOMP_MAP_POINTER:
|
|
case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION:
|
|
gomp_map_pointer
|
|
(tgt, aq, (uintptr_t) *(void **) k->host_start,
|
|
k->tgt_offset, sizes[i], cbufp,
|
|
((kind & typemask)
|
|
== GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION));
|
|
break;
|
|
case GOMP_MAP_TO_PSET:
|
|
gomp_copy_host2dev (devicep, aq,
|
|
(void *) (tgt->tgt_start
|
|
+ k->tgt_offset),
|
|
(void *) k->host_start,
|
|
k->host_end - k->host_start,
|
|
false, cbufp);
|
|
tgt->list[i].has_null_ptr_assoc = false;
|
|
|
|
for (j = i + 1; j < mapnum; j++)
|
|
{
|
|
int ptr_kind = (get_kind (short_mapkind, kinds, j)
|
|
& typemask);
|
|
if (!GOMP_MAP_POINTER_P (ptr_kind)
|
|
&& !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
|
|
break;
|
|
else if ((uintptr_t) hostaddrs[j] < k->host_start
|
|
|| ((uintptr_t) hostaddrs[j] + sizeof (void *)
|
|
> k->host_end))
|
|
break;
|
|
else
|
|
{
|
|
tgt->list[j].key = k;
|
|
tgt->list[j].copy_from = false;
|
|
tgt->list[j].always_copy_from = false;
|
|
tgt->list[j].is_attach = false;
|
|
tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
|
|
/* For OpenMP, the use of refcount_sets causes
|
|
errors if we set k->refcount = 1 above but also
|
|
increment it again here, for decrementing will
|
|
not properly match, since we decrement only once
|
|
for each key's refcount. Therefore avoid this
|
|
increment for OpenMP constructs. */
|
|
if (!openmp_p)
|
|
gomp_increment_refcount (k, refcount_set);
|
|
gomp_map_pointer (tgt, aq,
|
|
(uintptr_t) *(void **) hostaddrs[j],
|
|
k->tgt_offset
|
|
+ ((uintptr_t) hostaddrs[j]
|
|
- k->host_start),
|
|
sizes[j], cbufp, false);
|
|
}
|
|
}
|
|
i = j - 1;
|
|
break;
|
|
case GOMP_MAP_FORCE_PRESENT:
|
|
case GOMP_MAP_ALWAYS_PRESENT_TO:
|
|
case GOMP_MAP_ALWAYS_PRESENT_FROM:
|
|
case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
|
|
{
|
|
/* We already looked up the memory region above and it
|
|
was missing. */
|
|
size_t size = k->host_end - k->host_start;
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
#ifdef HAVE_INTTYPES_H
|
|
gomp_fatal ("present clause: not present on the device "
|
|
"(addr: %p, size: %"PRIu64" (0x%"PRIx64"), "
|
|
"dev: %d)", (void *) k->host_start,
|
|
(uint64_t) size, (uint64_t) size,
|
|
devicep->target_id);
|
|
#else
|
|
gomp_fatal ("present clause: not present on the device "
|
|
"(addr: %p, size: %lu (0x%lx), dev: %d)",
|
|
(void *) k->host_start,
|
|
(unsigned long) size, (unsigned long) size,
|
|
devicep->target_id);
|
|
#endif
|
|
}
|
|
break;
|
|
case GOMP_MAP_FORCE_DEVICEPTR:
|
|
assert (k->host_end - k->host_start == sizeof (void *));
|
|
gomp_copy_host2dev (devicep, aq,
|
|
(void *) (tgt->tgt_start
|
|
+ k->tgt_offset),
|
|
(void *) k->host_start,
|
|
sizeof (void *), false, cbufp);
|
|
break;
|
|
default:
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
|
|
kind);
|
|
}
|
|
|
|
if (k->aux && k->aux->link_key)
|
|
{
|
|
/* Set link pointer on target to the device address of the
|
|
mapped object. Also deal with offsets due to
|
|
array-section mapping. */
|
|
void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset
|
|
- (k->host_start
|
|
- k->aux->link_key->host_start));
|
|
/* We intentionally do not use coalescing here, as it's not
|
|
data allocated by the current call to this function. */
|
|
gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
|
|
&tgt_addr, sizeof (void *), true, NULL);
|
|
}
|
|
array++;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (pragma_kind & GOMP_MAP_VARS_TARGET)
|
|
{
|
|
for (i = 0; i < mapnum; i++)
|
|
{
|
|
cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
|
|
gomp_copy_host2dev (devicep, aq,
|
|
(void *) (tgt->tgt_start + i * sizeof (void *)),
|
|
(void *) &cur_node.tgt_offset, sizeof (void *),
|
|
true, cbufp);
|
|
}
|
|
}
|
|
|
|
if (cbufp)
|
|
{
|
|
long c = 0;
|
|
for (c = 0; c < cbuf.chunk_cnt; ++c)
|
|
gomp_copy_host2dev (devicep, aq,
|
|
(void *) (tgt->tgt_start + cbuf.chunks[c].start),
|
|
(char *) cbuf.buf + (cbuf.chunks[c].start
|
|
- cbuf.chunks[0].start),
|
|
cbuf.chunks[c].end - cbuf.chunks[c].start,
|
|
false, NULL);
|
|
if (aq)
|
|
/* Free once the transfer has completed. */
|
|
devicep->openacc.async.queue_callback_func (aq, free, cbuf.buf);
|
|
else
|
|
free (cbuf.buf);
|
|
cbuf.buf = NULL;
|
|
cbufp = NULL;
|
|
}
|
|
|
|
/* If the variable from "omp target enter data" map-list was already mapped,
|
|
tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
|
|
gomp_exit_data. */
|
|
if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
|
|
{
|
|
free (tgt);
|
|
tgt = NULL;
|
|
}
|
|
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
return tgt;
|
|
}
|
|
|
|
static struct target_mem_desc *
|
|
gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
|
|
void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
|
|
bool short_mapkind, htab_t *refcount_set,
|
|
enum gomp_map_vars_kind pragma_kind)
|
|
{
|
|
/* This management of a local refcount_set is for convenience of callers
|
|
who do not share a refcount_set over multiple map/unmap uses. */
|
|
htab_t local_refcount_set = NULL;
|
|
if (refcount_set == NULL)
|
|
{
|
|
local_refcount_set = htab_create (mapnum);
|
|
refcount_set = &local_refcount_set;
|
|
}
|
|
|
|
struct target_mem_desc *tgt;
|
|
tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
|
|
sizes, kinds, short_mapkind, refcount_set,
|
|
pragma_kind);
|
|
if (local_refcount_set)
|
|
htab_free (local_refcount_set);
|
|
|
|
return tgt;
|
|
}
|
|
|
|
attribute_hidden struct target_mem_desc *
|
|
goacc_map_vars (struct gomp_device_descr *devicep,
|
|
struct goacc_asyncqueue *aq, size_t mapnum,
|
|
void **hostaddrs, void **devaddrs, size_t *sizes,
|
|
void *kinds, bool short_mapkind,
|
|
enum gomp_map_vars_kind pragma_kind)
|
|
{
|
|
return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
|
|
sizes, kinds, short_mapkind, NULL,
|
|
GOMP_MAP_VARS_OPENACC | pragma_kind);
|
|
}
|
|
|
|
static void
|
|
gomp_unmap_tgt (struct target_mem_desc *tgt)
|
|
{
|
|
/* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
|
|
if (tgt->tgt_end)
|
|
gomp_free_device_memory (tgt->device_descr, tgt->to_free);
|
|
|
|
free (tgt->array);
|
|
free (tgt);
|
|
}
|
|
|
|
static bool
|
|
gomp_unref_tgt (void *ptr)
|
|
{
|
|
bool is_tgt_unmapped = false;
|
|
|
|
struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
|
|
|
|
if (tgt->refcount > 1)
|
|
tgt->refcount--;
|
|
else
|
|
{
|
|
gomp_unmap_tgt (tgt);
|
|
is_tgt_unmapped = true;
|
|
}
|
|
|
|
return is_tgt_unmapped;
|
|
}
|
|
|
|
static void
|
|
gomp_unref_tgt_void (void *ptr)
|
|
{
|
|
(void) gomp_unref_tgt (ptr);
|
|
}
|
|
|
|
static void
|
|
gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k)
|
|
{
|
|
splay_tree_remove (sp, k);
|
|
if (k->aux)
|
|
{
|
|
if (k->aux->link_key)
|
|
splay_tree_insert (sp, (splay_tree_node) k->aux->link_key);
|
|
if (k->aux->attach_count)
|
|
free (k->aux->attach_count);
|
|
free (k->aux);
|
|
k->aux = NULL;
|
|
}
|
|
}
|
|
|
|
static inline __attribute__((always_inline)) bool
|
|
gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
|
|
struct goacc_asyncqueue *aq)
|
|
{
|
|
bool is_tgt_unmapped = false;
|
|
|
|
if (REFCOUNT_STRUCTELEM_P (k->refcount))
|
|
{
|
|
if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false)
|
|
/* Infer the splay_tree_key of the first structelem key using the
|
|
pointer to the first structleme_refcount. */
|
|
k = (splay_tree_key) ((char *) k->structelem_refcount_ptr
|
|
- offsetof (struct splay_tree_key_s,
|
|
structelem_refcount));
|
|
assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount));
|
|
|
|
/* The array created by gomp_map_vars is an array of splay_tree_nodes,
|
|
with the splay_tree_keys embedded inside. */
|
|
splay_tree_node node =
|
|
(splay_tree_node) ((char *) k
|
|
- offsetof (struct splay_tree_node_s, key));
|
|
while (true)
|
|
{
|
|
/* Starting from the _FIRST key, and continue for all following
|
|
sibling keys. */
|
|
gomp_remove_splay_tree_key (&devicep->mem_map, k);
|
|
if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount))
|
|
break;
|
|
else
|
|
k = &(++node)->key;
|
|
}
|
|
}
|
|
else
|
|
gomp_remove_splay_tree_key (&devicep->mem_map, k);
|
|
|
|
if (aq)
|
|
devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
|
|
(void *) k->tgt);
|
|
else
|
|
is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
|
|
return is_tgt_unmapped;
|
|
}
|
|
|
|
attribute_hidden bool
|
|
gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
|
|
{
|
|
return gomp_remove_var_internal (devicep, k, NULL);
|
|
}
|
|
|
|
/* Remove a variable asynchronously. This actually removes the variable
|
|
mapping immediately, but retains the linked target_mem_desc until the
|
|
asynchronous operation has completed (as it may still refer to target
|
|
memory). The device lock must be held before entry, and remains locked on
|
|
exit. */
|
|
|
|
attribute_hidden void
|
|
gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
|
|
struct goacc_asyncqueue *aq)
|
|
{
|
|
(void) gomp_remove_var_internal (devicep, k, aq);
|
|
}
|
|
|
|
/* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
|
|
variables back from device to host: if it is false, it is assumed that this
|
|
has been done already. */
|
|
|
|
static inline __attribute__((always_inline)) void
|
|
gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
|
|
htab_t *refcount_set, struct goacc_asyncqueue *aq)
|
|
{
|
|
struct gomp_device_descr *devicep = tgt->device_descr;
|
|
|
|
if (tgt->list_count == 0)
|
|
{
|
|
free (tgt);
|
|
return;
|
|
}
|
|
|
|
gomp_mutex_lock (&devicep->lock);
|
|
if (devicep->state == GOMP_DEVICE_FINALIZED)
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
free (tgt->array);
|
|
free (tgt);
|
|
return;
|
|
}
|
|
|
|
size_t i;
|
|
|
|
/* We must perform detachments before any copies back to the host. */
|
|
for (i = 0; i < tgt->list_count; i++)
|
|
{
|
|
splay_tree_key k = tgt->list[i].key;
|
|
|
|
if (k != NULL && tgt->list[i].is_attach)
|
|
gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
|
|
+ tgt->list[i].offset,
|
|
false, NULL);
|
|
}
|
|
|
|
for (i = 0; i < tgt->list_count; i++)
|
|
{
|
|
splay_tree_key k = tgt->list[i].key;
|
|
if (k == NULL)
|
|
continue;
|
|
|
|
/* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
|
|
counts ('n->refcount', 'n->dynamic_refcount'). */
|
|
if (tgt->list[i].is_attach)
|
|
continue;
|
|
|
|
bool do_copy, do_remove;
|
|
gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove);
|
|
|
|
if ((do_copy && do_copyfrom && tgt->list[i].copy_from)
|
|
|| tgt->list[i].always_copy_from)
|
|
gomp_copy_dev2host (devicep, aq,
|
|
(void *) (k->host_start + tgt->list[i].offset),
|
|
(void *) (k->tgt->tgt_start + k->tgt_offset
|
|
+ tgt->list[i].offset),
|
|
tgt->list[i].length);
|
|
if (do_remove)
|
|
{
|
|
struct target_mem_desc *k_tgt = k->tgt;
|
|
bool is_tgt_unmapped = gomp_remove_var (devicep, k);
|
|
/* It would be bad if TGT got unmapped while we're still iterating
|
|
over its LIST_COUNT, and also expect to use it in the following
|
|
code. */
|
|
assert (!is_tgt_unmapped
|
|
|| k_tgt != tgt);
|
|
}
|
|
}
|
|
|
|
if (aq)
|
|
devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
|
|
(void *) tgt);
|
|
else
|
|
gomp_unref_tgt ((void *) tgt);
|
|
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
}
|
|
|
|
static void
|
|
gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
|
|
htab_t *refcount_set)
|
|
{
|
|
/* This management of a local refcount_set is for convenience of callers
|
|
who do not share a refcount_set over multiple map/unmap uses. */
|
|
htab_t local_refcount_set = NULL;
|
|
if (refcount_set == NULL)
|
|
{
|
|
local_refcount_set = htab_create (tgt->list_count);
|
|
refcount_set = &local_refcount_set;
|
|
}
|
|
|
|
gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL);
|
|
|
|
if (local_refcount_set)
|
|
htab_free (local_refcount_set);
|
|
}
|
|
|
|
attribute_hidden void
|
|
goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
|
|
struct goacc_asyncqueue *aq)
|
|
{
|
|
gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
|
|
}
|
|
|
|
static void
|
|
gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
|
|
size_t *sizes, void *kinds, bool short_mapkind)
|
|
{
|
|
size_t i;
|
|
struct splay_tree_key_s cur_node;
|
|
const int typemask = short_mapkind ? 0xff : 0x7;
|
|
|
|
if (!devicep)
|
|
return;
|
|
|
|
if (mapnum == 0)
|
|
return;
|
|
|
|
gomp_mutex_lock (&devicep->lock);
|
|
if (devicep->state == GOMP_DEVICE_FINALIZED)
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
return;
|
|
}
|
|
|
|
for (i = 0; i < mapnum; i++)
|
|
if (sizes[i])
|
|
{
|
|
cur_node.host_start = (uintptr_t) hostaddrs[i];
|
|
cur_node.host_end = cur_node.host_start + sizes[i];
|
|
splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
|
|
if (n)
|
|
{
|
|
int kind = get_kind (short_mapkind, kinds, i);
|
|
if (n->host_start > cur_node.host_start
|
|
|| n->host_end < cur_node.host_end)
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
gomp_fatal ("Trying to update [%p..%p) object when "
|
|
"only [%p..%p) is mapped",
|
|
(void *) cur_node.host_start,
|
|
(void *) cur_node.host_end,
|
|
(void *) n->host_start,
|
|
(void *) n->host_end);
|
|
}
|
|
|
|
if (n->aux && n->aux->attach_count)
|
|
{
|
|
uintptr_t addr = cur_node.host_start;
|
|
while (addr < cur_node.host_end)
|
|
{
|
|
/* We have to be careful not to overwrite still attached
|
|
pointers during host<->device updates. */
|
|
size_t i = (addr - cur_node.host_start) / sizeof (void *);
|
|
if (n->aux->attach_count[i] == 0)
|
|
{
|
|
void *devaddr = (void *) (n->tgt->tgt_start
|
|
+ n->tgt_offset
|
|
+ addr - n->host_start);
|
|
if (GOMP_MAP_COPY_TO_P (kind & typemask))
|
|
gomp_copy_host2dev (devicep, NULL,
|
|
devaddr, (void *) addr,
|
|
sizeof (void *), false, NULL);
|
|
if (GOMP_MAP_COPY_FROM_P (kind & typemask))
|
|
gomp_copy_dev2host (devicep, NULL,
|
|
(void *) addr, devaddr,
|
|
sizeof (void *));
|
|
}
|
|
addr += sizeof (void *);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
void *hostaddr = (void *) cur_node.host_start;
|
|
void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
|
|
+ cur_node.host_start
|
|
- n->host_start);
|
|
size_t size = cur_node.host_end - cur_node.host_start;
|
|
|
|
if (GOMP_MAP_COPY_TO_P (kind & typemask))
|
|
gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
|
|
false, NULL);
|
|
if (GOMP_MAP_COPY_FROM_P (kind & typemask))
|
|
gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
int kind = get_kind (short_mapkind, kinds, i);
|
|
|
|
if (GOMP_MAP_PRESENT_P (kind))
|
|
{
|
|
/* We already looked up the memory region above and it
|
|
was missing. */
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
#ifdef HAVE_INTTYPES_H
|
|
gomp_fatal ("present clause: not present on the device "
|
|
"(addr: %p, size: %"PRIu64" (0x%"PRIx64"), "
|
|
"dev: %d)", (void *) hostaddrs[i],
|
|
(uint64_t) sizes[i], (uint64_t) sizes[i],
|
|
devicep->target_id);
|
|
#else
|
|
gomp_fatal ("present clause: not present on the device "
|
|
"(addr: %p, size: %lu (0x%lx), dev: %d)",
|
|
(void *) hostaddrs[i], (unsigned long) sizes[i],
|
|
(unsigned long) sizes[i], devicep->target_id);
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
}
|
|
|
|
static struct gomp_offload_icv_list *
|
|
gomp_get_offload_icv_item (int dev_num)
|
|
{
|
|
struct gomp_offload_icv_list *l = gomp_offload_icv_list;
|
|
while (l != NULL && l->device_num != dev_num)
|
|
l = l->next;
|
|
|
|
return l;
|
|
}
|
|
|
|
/* Helper function for 'gomp_load_image_to_device'. Returns the ICV values
|
|
depending on the device num and the variable hierarchy
|
|
(_DEV_42, _DEV, _ALL). If no ICV was initially configured for the given
|
|
device and thus no item with that device number is contained in
|
|
gomp_offload_icv_list, then a new item is created and added to the list. */
|
|
|
|
static struct gomp_offload_icvs *
|
|
get_gomp_offload_icvs (int dev_num)
|
|
{
|
|
struct gomp_icv_list *dev
|
|
= gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV);
|
|
struct gomp_icv_list *all
|
|
= gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL);
|
|
struct gomp_icv_list *dev_x = gomp_get_initial_icv_item (dev_num);
|
|
struct gomp_offload_icv_list *offload_icvs
|
|
= gomp_get_offload_icv_item (dev_num);
|
|
|
|
if (offload_icvs != NULL)
|
|
return &offload_icvs->icvs;
|
|
|
|
struct gomp_offload_icv_list *new
|
|
= (struct gomp_offload_icv_list *) gomp_malloc (sizeof (struct gomp_offload_icv_list));
|
|
|
|
new->device_num = dev_num;
|
|
new->icvs.device_num = dev_num;
|
|
new->next = gomp_offload_icv_list;
|
|
|
|
if (dev_x != NULL && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_NTEAMS))
|
|
new->icvs.nteams = dev_x->icvs.nteams_var;
|
|
else if (dev != NULL && gomp_get_icv_flag (dev->flags, GOMP_ICV_NTEAMS))
|
|
new->icvs.nteams = dev->icvs.nteams_var;
|
|
else if (all != NULL && gomp_get_icv_flag (all->flags, GOMP_ICV_NTEAMS))
|
|
new->icvs.nteams = all->icvs.nteams_var;
|
|
else
|
|
new->icvs.nteams = gomp_default_icv_values.nteams_var;
|
|
|
|
if (dev_x != NULL
|
|
&& gomp_get_icv_flag (dev_x->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
|
|
new->icvs.teams_thread_limit = dev_x->icvs.teams_thread_limit_var;
|
|
else if (dev != NULL
|
|
&& gomp_get_icv_flag (dev->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
|
|
new->icvs.teams_thread_limit = dev->icvs.teams_thread_limit_var;
|
|
else if (all != NULL
|
|
&& gomp_get_icv_flag (all->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
|
|
new->icvs.teams_thread_limit = all->icvs.teams_thread_limit_var;
|
|
else
|
|
new->icvs.teams_thread_limit
|
|
= gomp_default_icv_values.teams_thread_limit_var;
|
|
|
|
if (dev_x != NULL
|
|
&& gomp_get_icv_flag (dev_x->flags, GOMP_ICV_DEFAULT_DEVICE))
|
|
new->icvs.default_device = dev_x->icvs.default_device_var;
|
|
else if (dev != NULL
|
|
&& gomp_get_icv_flag (dev->flags, GOMP_ICV_DEFAULT_DEVICE))
|
|
new->icvs.default_device = dev->icvs.default_device_var;
|
|
else if (all != NULL
|
|
&& gomp_get_icv_flag (all->flags, GOMP_ICV_DEFAULT_DEVICE))
|
|
new->icvs.default_device = all->icvs.default_device_var;
|
|
else
|
|
new->icvs.default_device = gomp_default_icv_values.default_device_var;
|
|
|
|
gomp_offload_icv_list = new;
|
|
return &new->icvs;
|
|
}
|
|
|
|
/* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
|
|
And insert to splay tree the mapping between addresses from HOST_TABLE and
|
|
from loaded target image. We rely in the host and device compiler
|
|
emitting variable and functions in the same order. */
|
|
|
|
static void
|
|
gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
|
|
const void *host_table, const void *target_data,
|
|
bool is_register_lock)
|
|
{
|
|
void **host_func_table = ((void ***) host_table)[0];
|
|
void **host_funcs_end = ((void ***) host_table)[1];
|
|
void **host_var_table = ((void ***) host_table)[2];
|
|
void **host_vars_end = ((void ***) host_table)[3];
|
|
void **host_ind_func_table = NULL;
|
|
void **host_ind_funcs_end = NULL;
|
|
|
|
if (GOMP_VERSION_SUPPORTS_INDIRECT_FUNCS (version))
|
|
{
|
|
host_ind_func_table = ((void ***) host_table)[4];
|
|
host_ind_funcs_end = ((void ***) host_table)[5];
|
|
}
|
|
|
|
/* The func and ind_func tables contain only addresses, the var table
|
|
contains addresses and corresponding sizes. */
|
|
int num_funcs = host_funcs_end - host_func_table;
|
|
int num_vars = (host_vars_end - host_var_table) / 2;
|
|
int num_ind_funcs = (host_ind_funcs_end - host_ind_func_table);
|
|
|
|
/* Load image to device and get target addresses for the image. */
|
|
struct addr_pair *target_table = NULL;
|
|
uint64_t *rev_target_fn_table = NULL;
|
|
int i, num_target_entries;
|
|
|
|
/* With reverse offload, insert also target-host addresses. */
|
|
bool rev_lookup = omp_requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD;
|
|
|
|
num_target_entries
|
|
= devicep->load_image_func (devicep->target_id, version,
|
|
target_data, &target_table,
|
|
rev_lookup ? &rev_target_fn_table : NULL,
|
|
num_ind_funcs
|
|
? (uint64_t *) host_ind_func_table : NULL);
|
|
|
|
/* The "+1" is due to the additional ICV struct. */
|
|
if (num_target_entries != num_funcs + num_vars + 1)
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
if (is_register_lock)
|
|
gomp_mutex_unlock (®ister_lock);
|
|
gomp_fatal ("Cannot map target functions or variables"
|
|
" (expected %u + %u + 1, have %u)", num_funcs, num_vars,
|
|
num_target_entries);
|
|
}
|
|
|
|
/* Insert host-target address mapping into splay tree. */
|
|
struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
|
|
/* "+1" due to the additional ICV struct. */
|
|
tgt->array = gomp_malloc ((num_funcs + num_vars + 1)
|
|
* sizeof (*tgt->array));
|
|
if (rev_target_fn_table)
|
|
tgt->rev_array = gomp_malloc (num_funcs * sizeof (*tgt->rev_array));
|
|
else
|
|
tgt->rev_array = NULL;
|
|
tgt->refcount = REFCOUNT_INFINITY;
|
|
tgt->tgt_start = 0;
|
|
tgt->tgt_end = 0;
|
|
tgt->to_free = NULL;
|
|
tgt->prev = NULL;
|
|
tgt->list_count = 0;
|
|
tgt->device_descr = devicep;
|
|
splay_tree_node array = tgt->array;
|
|
reverse_splay_tree_node rev_array = tgt->rev_array;
|
|
|
|
for (i = 0; i < num_funcs; i++)
|
|
{
|
|
splay_tree_key k = &array->key;
|
|
k->host_start = (uintptr_t) host_func_table[i];
|
|
k->host_end = k->host_start + 1;
|
|
k->tgt = tgt;
|
|
k->tgt_offset = target_table[i].start;
|
|
k->refcount = REFCOUNT_INFINITY;
|
|
k->dynamic_refcount = 0;
|
|
k->aux = NULL;
|
|
array->left = NULL;
|
|
array->right = NULL;
|
|
splay_tree_insert (&devicep->mem_map, array);
|
|
if (rev_target_fn_table)
|
|
{
|
|
reverse_splay_tree_key k2 = &rev_array->key;
|
|
k2->dev = rev_target_fn_table[i];
|
|
k2->k = k;
|
|
rev_array->left = NULL;
|
|
rev_array->right = NULL;
|
|
if (k2->dev != 0)
|
|
reverse_splay_tree_insert (&devicep->mem_map_rev, rev_array);
|
|
rev_array++;
|
|
}
|
|
array++;
|
|
}
|
|
|
|
/* Most significant bit of the size in host and target tables marks
|
|
"omp declare target link" variables. */
|
|
const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
|
|
const uintptr_t size_mask = ~link_bit;
|
|
|
|
for (i = 0; i < num_vars; i++)
|
|
{
|
|
struct addr_pair *target_var = &target_table[num_funcs + i];
|
|
uintptr_t target_size = target_var->end - target_var->start;
|
|
bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
|
|
|
|
if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
if (is_register_lock)
|
|
gomp_mutex_unlock (®ister_lock);
|
|
gomp_fatal ("Cannot map target variables (size mismatch)");
|
|
}
|
|
|
|
splay_tree_key k = &array->key;
|
|
k->host_start = (uintptr_t) host_var_table[i * 2];
|
|
k->host_end
|
|
= k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
|
|
k->tgt = tgt;
|
|
k->tgt_offset = target_var->start;
|
|
k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
|
|
k->dynamic_refcount = 0;
|
|
k->aux = NULL;
|
|
array->left = NULL;
|
|
array->right = NULL;
|
|
splay_tree_insert (&devicep->mem_map, array);
|
|
array++;
|
|
}
|
|
|
|
/* Last entry is for the ICV struct variable; if absent, start = end = 0. */
|
|
struct addr_pair *icv_var = &target_table[num_funcs + num_vars];
|
|
if (icv_var->start != 0)
|
|
{
|
|
/* The index of the devicep within devices[] is regarded as its
|
|
'device number', which is different from the per-device type
|
|
devicep->target_id. */
|
|
int dev_num = (int) (devicep - &devices[0]);
|
|
struct gomp_offload_icvs *icvs = get_gomp_offload_icvs (dev_num);
|
|
size_t var_size = icv_var->end - icv_var->start;
|
|
if (var_size != sizeof (struct gomp_offload_icvs))
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
if (is_register_lock)
|
|
gomp_mutex_unlock (®ister_lock);
|
|
gomp_fatal ("offload plugin managed 'icv struct' not of expected "
|
|
"format");
|
|
}
|
|
/* Copy the ICVs variable to place on device memory, hereby
|
|
actually designating its device number into effect. */
|
|
gomp_copy_host2dev (devicep, NULL, (void *) icv_var->start, icvs,
|
|
var_size, false, NULL);
|
|
splay_tree_key k = &array->key;
|
|
k->host_start = (uintptr_t) icvs;
|
|
k->host_end =
|
|
k->host_start + (size_mask & sizeof (struct gomp_offload_icvs));
|
|
k->tgt = tgt;
|
|
k->tgt_offset = icv_var->start;
|
|
k->refcount = REFCOUNT_INFINITY;
|
|
k->dynamic_refcount = 0;
|
|
k->aux = NULL;
|
|
array->left = NULL;
|
|
array->right = NULL;
|
|
splay_tree_insert (&devicep->mem_map, array);
|
|
array++;
|
|
}
|
|
|
|
free (target_table);
|
|
}
|
|
|
|
/* Unload the mappings described by target_data from device DEVICE_P.
|
|
The device must be locked. */
|
|
|
|
static void
|
|
gomp_unload_image_from_device (struct gomp_device_descr *devicep,
|
|
unsigned version,
|
|
const void *host_table, const void *target_data)
|
|
{
|
|
void **host_func_table = ((void ***) host_table)[0];
|
|
void **host_funcs_end = ((void ***) host_table)[1];
|
|
void **host_var_table = ((void ***) host_table)[2];
|
|
void **host_vars_end = ((void ***) host_table)[3];
|
|
|
|
/* The func table contains only addresses, the var table contains addresses
|
|
and corresponding sizes. */
|
|
int num_funcs = host_funcs_end - host_func_table;
|
|
int num_vars = (host_vars_end - host_var_table) / 2;
|
|
|
|
struct splay_tree_key_s k;
|
|
splay_tree_key node = NULL;
|
|
|
|
/* Find mapping at start of node array */
|
|
if (num_funcs || num_vars)
|
|
{
|
|
k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
|
|
: (uintptr_t) host_var_table[0]);
|
|
k.host_end = k.host_start + 1;
|
|
node = splay_tree_lookup (&devicep->mem_map, &k);
|
|
}
|
|
|
|
if (!devicep->unload_image_func (devicep->target_id, version, target_data))
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
gomp_fatal ("image unload fail");
|
|
}
|
|
if (devicep->mem_map_rev.root)
|
|
{
|
|
/* Free reverse offload splay tree + data; 'tgt->rev_array' is the only
|
|
real allocation. */
|
|
assert (node && node->tgt && node->tgt->rev_array);
|
|
assert (devicep->mem_map_rev.root->key.k->tgt == node->tgt);
|
|
free (node->tgt->rev_array);
|
|
devicep->mem_map_rev.root = NULL;
|
|
}
|
|
|
|
/* Remove mappings from splay tree. */
|
|
int i;
|
|
for (i = 0; i < num_funcs; i++)
|
|
{
|
|
k.host_start = (uintptr_t) host_func_table[i];
|
|
k.host_end = k.host_start + 1;
|
|
splay_tree_remove (&devicep->mem_map, &k);
|
|
}
|
|
|
|
/* Most significant bit of the size in host and target tables marks
|
|
"omp declare target link" variables. */
|
|
const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
|
|
const uintptr_t size_mask = ~link_bit;
|
|
bool is_tgt_unmapped = false;
|
|
|
|
for (i = 0; i < num_vars; i++)
|
|
{
|
|
k.host_start = (uintptr_t) host_var_table[i * 2];
|
|
k.host_end
|
|
= k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
|
|
|
|
if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
|
|
splay_tree_remove (&devicep->mem_map, &k);
|
|
else
|
|
{
|
|
splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
|
|
is_tgt_unmapped = gomp_remove_var (devicep, n);
|
|
}
|
|
}
|
|
|
|
if (node && !is_tgt_unmapped)
|
|
{
|
|
free (node->tgt);
|
|
free (node);
|
|
}
|
|
}
|
|
|
|
static void
|
|
gomp_requires_to_name (char *buf, size_t size, int requires_mask)
|
|
{
|
|
char *end = buf + size, *p = buf;
|
|
if (requires_mask & GOMP_REQUIRES_UNIFIED_ADDRESS)
|
|
p += snprintf (p, end - p, "unified_address");
|
|
if (requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
|
|
p += snprintf (p, end - p, "%sunified_shared_memory",
|
|
(p == buf ? "" : ", "));
|
|
if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD)
|
|
p += snprintf (p, end - p, "%sreverse_offload",
|
|
(p == buf ? "" : ", "));
|
|
}
|
|
|
|
/* This function should be called from every offload image while loading.
|
|
It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
|
|
the target, and DATA. */
|
|
|
|
void
|
|
GOMP_offload_register_ver (unsigned version, const void *host_table,
|
|
int target_type, const void *data)
|
|
{
|
|
int i;
|
|
|
|
if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
|
|
gomp_fatal ("Library too old for offload (version %u < %u)",
|
|
GOMP_VERSION, GOMP_VERSION_LIB (version));
|
|
|
|
int omp_req;
|
|
const void *target_data;
|
|
if (GOMP_VERSION_LIB (version) > 1)
|
|
{
|
|
omp_req = (int) (size_t) ((void **) data)[0];
|
|
target_data = &((void **) data)[1];
|
|
}
|
|
else
|
|
{
|
|
omp_req = 0;
|
|
target_data = data;
|
|
}
|
|
|
|
gomp_mutex_lock (®ister_lock);
|
|
|
|
if (omp_req && omp_requires_mask && omp_requires_mask != omp_req)
|
|
{
|
|
char buf1[sizeof ("unified_address, unified_shared_memory, "
|
|
"reverse_offload")];
|
|
char buf2[sizeof ("unified_address, unified_shared_memory, "
|
|
"reverse_offload")];
|
|
gomp_requires_to_name (buf2, sizeof (buf2),
|
|
omp_req != GOMP_REQUIRES_TARGET_USED
|
|
? omp_req : omp_requires_mask);
|
|
if (omp_req != GOMP_REQUIRES_TARGET_USED
|
|
&& omp_requires_mask != GOMP_REQUIRES_TARGET_USED)
|
|
{
|
|
gomp_requires_to_name (buf1, sizeof (buf1), omp_requires_mask);
|
|
gomp_fatal ("OpenMP 'requires' directive with non-identical clauses "
|
|
"in multiple compilation units: '%s' vs. '%s'",
|
|
buf1, buf2);
|
|
}
|
|
else
|
|
gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in "
|
|
"some compilation units", buf2);
|
|
}
|
|
omp_requires_mask = omp_req;
|
|
|
|
/* Load image to all initialized devices. */
|
|
for (i = 0; i < num_devices; i++)
|
|
{
|
|
struct gomp_device_descr *devicep = &devices[i];
|
|
gomp_mutex_lock (&devicep->lock);
|
|
if (devicep->type == target_type
|
|
&& devicep->state == GOMP_DEVICE_INITIALIZED)
|
|
gomp_load_image_to_device (devicep, version,
|
|
host_table, target_data, true);
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
}
|
|
|
|
/* Insert image to array of pending images. */
|
|
offload_images
|
|
= gomp_realloc_unlock (offload_images,
|
|
(num_offload_images + 1)
|
|
* sizeof (struct offload_image_descr));
|
|
offload_images[num_offload_images].version = version;
|
|
offload_images[num_offload_images].type = target_type;
|
|
offload_images[num_offload_images].host_table = host_table;
|
|
offload_images[num_offload_images].target_data = target_data;
|
|
|
|
num_offload_images++;
|
|
gomp_mutex_unlock (®ister_lock);
|
|
}
|
|
|
|
/* Legacy entry point. */
|
|
|
|
void
|
|
GOMP_offload_register (const void *host_table, int target_type,
|
|
const void *target_data)
|
|
{
|
|
GOMP_offload_register_ver (0, host_table, target_type, target_data);
|
|
}
|
|
|
|
/* This function should be called from every offload image while unloading.
|
|
It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
|
|
the target, and DATA. */
|
|
|
|
void
|
|
GOMP_offload_unregister_ver (unsigned version, const void *host_table,
|
|
int target_type, const void *data)
|
|
{
|
|
int i;
|
|
|
|
if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
|
|
gomp_fatal ("Library too old for offload (version %u < %u)",
|
|
GOMP_VERSION, GOMP_VERSION_LIB (version));
|
|
|
|
const void *target_data;
|
|
if (GOMP_VERSION_LIB (version) > 1)
|
|
target_data = &((void **) data)[1];
|
|
else
|
|
target_data = data;
|
|
|
|
gomp_mutex_lock (®ister_lock);
|
|
|
|
/* Unload image from all initialized devices. */
|
|
for (i = 0; i < num_devices; i++)
|
|
{
|
|
struct gomp_device_descr *devicep = &devices[i];
|
|
gomp_mutex_lock (&devicep->lock);
|
|
if (devicep->type == target_type
|
|
&& devicep->state == GOMP_DEVICE_INITIALIZED)
|
|
gomp_unload_image_from_device (devicep, version,
|
|
host_table, target_data);
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
}
|
|
|
|
/* Remove image from array of pending images. */
|
|
for (i = 0; i < num_offload_images; i++)
|
|
if (offload_images[i].target_data == target_data)
|
|
{
|
|
offload_images[i] = offload_images[--num_offload_images];
|
|
break;
|
|
}
|
|
|
|
gomp_mutex_unlock (®ister_lock);
|
|
}
|
|
|
|
/* Legacy entry point. */
|
|
|
|
void
|
|
GOMP_offload_unregister (const void *host_table, int target_type,
|
|
const void *target_data)
|
|
{
|
|
GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
|
|
}
|
|
|
|
/* This function initializes the target device, specified by DEVICEP. DEVICEP
|
|
must be locked on entry, and remains locked on return. */
|
|
|
|
attribute_hidden void
|
|
gomp_init_device (struct gomp_device_descr *devicep)
|
|
{
|
|
int i;
|
|
if (!devicep->init_device_func (devicep->target_id))
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
gomp_fatal ("device initialization failed");
|
|
}
|
|
|
|
/* Load to device all images registered by the moment. */
|
|
for (i = 0; i < num_offload_images; i++)
|
|
{
|
|
struct offload_image_descr *image = &offload_images[i];
|
|
if (image->type == devicep->type)
|
|
gomp_load_image_to_device (devicep, image->version,
|
|
image->host_table, image->target_data,
|
|
false);
|
|
}
|
|
|
|
/* Initialize OpenACC asynchronous queues. */
|
|
goacc_init_asyncqueues (devicep);
|
|
|
|
devicep->state = GOMP_DEVICE_INITIALIZED;
|
|
}
|
|
|
|
/* This function finalizes the target device, specified by DEVICEP. DEVICEP
|
|
must be locked on entry, and remains locked on return. */
|
|
|
|
attribute_hidden bool
|
|
gomp_fini_device (struct gomp_device_descr *devicep)
|
|
{
|
|
bool ret = goacc_fini_asyncqueues (devicep);
|
|
ret &= devicep->fini_device_func (devicep->target_id);
|
|
devicep->state = GOMP_DEVICE_FINALIZED;
|
|
return ret;
|
|
}
|
|
|
|
attribute_hidden void
|
|
gomp_unload_device (struct gomp_device_descr *devicep)
|
|
{
|
|
if (devicep->state == GOMP_DEVICE_INITIALIZED)
|
|
{
|
|
unsigned i;
|
|
|
|
/* Unload from device all images registered at the moment. */
|
|
for (i = 0; i < num_offload_images; i++)
|
|
{
|
|
struct offload_image_descr *image = &offload_images[i];
|
|
if (image->type == devicep->type)
|
|
gomp_unload_image_from_device (devicep, image->version,
|
|
image->host_table,
|
|
image->target_data);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Host fallback for GOMP_target{,_ext} routines. */
|
|
|
|
static void
|
|
gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
|
|
struct gomp_device_descr *devicep, void **args)
|
|
{
|
|
struct gomp_thread old_thr, *thr = gomp_thread ();
|
|
|
|
if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
|
|
&& devicep != NULL)
|
|
gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
|
|
"be used for offloading");
|
|
|
|
old_thr = *thr;
|
|
memset (thr, '\0', sizeof (*thr));
|
|
if (gomp_places_list)
|
|
{
|
|
thr->place = old_thr.place;
|
|
thr->ts.place_partition_len = gomp_places_list_len;
|
|
}
|
|
if (args)
|
|
while (*args)
|
|
{
|
|
intptr_t id = (intptr_t) *args++, val;
|
|
if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
|
|
val = (intptr_t) *args++;
|
|
else
|
|
val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
|
|
if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
|
|
continue;
|
|
id &= GOMP_TARGET_ARG_ID_MASK;
|
|
if (id != GOMP_TARGET_ARG_THREAD_LIMIT)
|
|
continue;
|
|
val = val > INT_MAX ? INT_MAX : val;
|
|
if (val)
|
|
gomp_icv (true)->thread_limit_var = val;
|
|
break;
|
|
}
|
|
|
|
fn (hostaddrs);
|
|
gomp_free_thread (thr);
|
|
*thr = old_thr;
|
|
}
|
|
|
|
/* Calculate alignment and size requirements of a private copy of data shared
|
|
as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
|
|
|
|
static inline void
|
|
calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
|
|
unsigned short *kinds, size_t *tgt_align,
|
|
size_t *tgt_size)
|
|
{
|
|
size_t i;
|
|
for (i = 0; i < mapnum; i++)
|
|
if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
|
|
{
|
|
size_t align = (size_t) 1 << (kinds[i] >> 8);
|
|
if (*tgt_align < align)
|
|
*tgt_align = align;
|
|
*tgt_size = (*tgt_size + align - 1) & ~(align - 1);
|
|
*tgt_size += sizes[i];
|
|
}
|
|
}
|
|
|
|
/* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
|
|
|
|
static inline void
|
|
copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
|
|
size_t *sizes, unsigned short *kinds, size_t tgt_align,
|
|
size_t tgt_size)
|
|
{
|
|
uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
|
|
if (al)
|
|
tgt += tgt_align - al;
|
|
tgt_size = 0;
|
|
size_t i;
|
|
for (i = 0; i < mapnum; i++)
|
|
if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE && hostaddrs[i] != NULL)
|
|
{
|
|
size_t align = (size_t) 1 << (kinds[i] >> 8);
|
|
tgt_size = (tgt_size + align - 1) & ~(align - 1);
|
|
memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
|
|
hostaddrs[i] = tgt + tgt_size;
|
|
tgt_size = tgt_size + sizes[i];
|
|
if (i + 1 < mapnum && (kinds[i+1] & 0xff) == GOMP_MAP_ATTACH)
|
|
{
|
|
*(*(uintptr_t**) hostaddrs[i+1] + sizes[i+1]) = (uintptr_t) hostaddrs[i];
|
|
++i;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Helper function of GOMP_target{,_ext} routines. */
|
|
|
|
static void *
|
|
gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
|
|
void (*host_fn) (void *))
|
|
{
|
|
if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
|
|
return (void *) host_fn;
|
|
else
|
|
{
|
|
gomp_mutex_lock (&devicep->lock);
|
|
if (devicep->state == GOMP_DEVICE_FINALIZED)
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
return NULL;
|
|
}
|
|
|
|
struct splay_tree_key_s k;
|
|
k.host_start = (uintptr_t) host_fn;
|
|
k.host_end = k.host_start + 1;
|
|
splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
if (tgt_fn == NULL)
|
|
return NULL;
|
|
|
|
return (void *) tgt_fn->tgt_offset;
|
|
}
|
|
}
|
|
|
|
/* Called when encountering a target directive. If DEVICE
|
|
is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
|
|
GOMP_DEVICE_HOST_FALLBACK (or any value
|
|
larger than last available hw device), use host fallback.
|
|
FN is address of host code, UNUSED is part of the current ABI, but
|
|
we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
|
|
with MAPNUM entries, with addresses of the host objects,
|
|
sizes of the host objects (resp. for pointer kind pointer bias
|
|
and assumed sizeof (void *) size) and kinds. */
|
|
|
|
void
|
|
GOMP_target (int device, void (*fn) (void *), const void *unused,
|
|
size_t mapnum, void **hostaddrs, size_t *sizes,
|
|
unsigned char *kinds)
|
|
{
|
|
struct gomp_device_descr *devicep = resolve_device (device, true);
|
|
|
|
void *fn_addr;
|
|
if (devicep == NULL
|
|
|| !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
/* All shared memory devices should use the GOMP_target_ext function. */
|
|
|| devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
|
|
|| !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
|
|
return gomp_target_fallback (fn, hostaddrs, devicep, NULL);
|
|
|
|
htab_t refcount_set = htab_create (mapnum);
|
|
struct target_mem_desc *tgt_vars
|
|
= gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
|
|
&refcount_set, GOMP_MAP_VARS_TARGET);
|
|
devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
|
|
NULL);
|
|
htab_clear (refcount_set);
|
|
gomp_unmap_vars (tgt_vars, true, &refcount_set);
|
|
htab_free (refcount_set);
|
|
}
|
|
|
|
static inline unsigned int
|
|
clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
|
|
{
|
|
/* If we cannot run asynchronously, simply ignore nowait. */
|
|
if (devicep != NULL && devicep->async_run_func == NULL)
|
|
flags &= ~GOMP_TARGET_FLAG_NOWAIT;
|
|
|
|
return flags;
|
|
}
|
|
|
|
static void
|
|
gomp_copy_back_icvs (struct gomp_device_descr *devicep, int device)
|
|
{
|
|
struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
|
|
if (item == NULL)
|
|
return;
|
|
|
|
gomp_mutex_lock (&devicep->lock);
|
|
|
|
struct splay_tree_s *mem_map = &devicep->mem_map;
|
|
struct splay_tree_key_s cur_node;
|
|
void *dev_ptr = NULL;
|
|
|
|
void *host_ptr = &item->icvs;
|
|
cur_node.host_start = (uintptr_t) host_ptr;
|
|
cur_node.host_end = cur_node.host_start;
|
|
splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
|
|
|
|
if (n)
|
|
{
|
|
uintptr_t offset = cur_node.host_start - n->host_start;
|
|
dev_ptr = (void *) (n->tgt->tgt_start + n->tgt_offset + offset);
|
|
}
|
|
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
|
|
if (dev_ptr != NULL)
|
|
gomp_copy_dev2host (devicep, NULL, host_ptr, dev_ptr,
|
|
sizeof (struct gomp_offload_icvs));
|
|
}
|
|
|
|
/* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
|
|
and several arguments have been added:
|
|
FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
|
|
DEPEND is array of dependencies, see GOMP_task for details.
|
|
|
|
ARGS is a pointer to an array consisting of a variable number of both
|
|
device-independent and device-specific arguments, which can take one two
|
|
elements where the first specifies for which device it is intended, the type
|
|
and optionally also the value. If the value is not present in the first
|
|
one, the whole second element the actual value. The last element of the
|
|
array is a single NULL. Among the device independent can be for example
|
|
NUM_TEAMS and THREAD_LIMIT.
|
|
|
|
NUM_TEAMS is positive if GOMP_teams will be called in the body with
|
|
that value, or 1 if teams construct is not present, or 0, if
|
|
teams construct does not have num_teams clause and so the choice is
|
|
implementation defined, and -1 if it can't be determined on the host
|
|
what value will GOMP_teams have on the device.
|
|
THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
|
|
body with that value, or 0, if teams construct does not have thread_limit
|
|
clause or the teams construct is not present, or -1 if it can't be
|
|
determined on the host what value will GOMP_teams have on the device. */
|
|
|
|
void
|
|
GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
|
|
void **hostaddrs, size_t *sizes, unsigned short *kinds,
|
|
unsigned int flags, void **depend, void **args)
|
|
{
|
|
struct gomp_device_descr *devicep = resolve_device (device, true);
|
|
size_t tgt_align = 0, tgt_size = 0;
|
|
bool fpc_done = false;
|
|
|
|
/* Obtain the original TEAMS and THREADS values from ARGS. */
|
|
intptr_t orig_teams = 1, orig_threads = 0;
|
|
size_t num_args = 0, len = 1, teams_len = 1, threads_len = 1;
|
|
void **tmpargs = args;
|
|
while (*tmpargs)
|
|
{
|
|
intptr_t id = (intptr_t) *tmpargs++, val;
|
|
if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
|
|
{
|
|
val = (intptr_t) *tmpargs++;
|
|
len = 2;
|
|
}
|
|
else
|
|
{
|
|
val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
|
|
len = 1;
|
|
}
|
|
num_args += len;
|
|
if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
|
|
continue;
|
|
val = val > INT_MAX ? INT_MAX : val;
|
|
if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS)
|
|
{
|
|
orig_teams = val;
|
|
teams_len = len;
|
|
}
|
|
else if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT)
|
|
{
|
|
orig_threads = val;
|
|
threads_len = len;
|
|
}
|
|
}
|
|
|
|
intptr_t new_teams = orig_teams, new_threads = orig_threads;
|
|
/* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1.
|
|
ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the
|
|
value could not be determined. No change.
|
|
ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
|
|
Set device-specific value.
|
|
ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
|
|
No change. */
|
|
if (orig_teams == -2)
|
|
new_teams = 1;
|
|
else if (orig_teams == 0)
|
|
{
|
|
struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
|
|
if (item != NULL)
|
|
new_teams = item->icvs.nteams;
|
|
}
|
|
/* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS
|
|
region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by
|
|
e.g. a THREAD_LIMIT clause. */
|
|
if (orig_teams > -2 && orig_threads == 0)
|
|
{
|
|
struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
|
|
if (item != NULL)
|
|
new_threads = item->icvs.teams_thread_limit;
|
|
}
|
|
|
|
/* Copy and change the arguments list only if TEAMS or THREADS need to be
|
|
updated. */
|
|
void **new_args = args;
|
|
if (orig_teams != new_teams || orig_threads != new_threads)
|
|
{
|
|
size_t tms_len = (orig_teams == new_teams
|
|
? teams_len
|
|
: (new_teams > -(1 << 15) && new_teams < (1 << 15)
|
|
? 1 : 2));
|
|
size_t ths_len = (orig_threads == new_threads
|
|
? threads_len
|
|
: (new_threads > -(1 << 15) && new_threads < (1 << 15)
|
|
? 1 : 2));
|
|
/* One additional item after the last arg must be NULL. */
|
|
size_t new_args_cnt = num_args - teams_len - threads_len + tms_len
|
|
+ ths_len + 1;
|
|
new_args = (void **) gomp_alloca (new_args_cnt * sizeof (void*));
|
|
|
|
tmpargs = args;
|
|
void **tmp_new_args = new_args;
|
|
/* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied
|
|
too if they have not been changed and skipped otherwise. */
|
|
while (*tmpargs)
|
|
{
|
|
intptr_t id = (intptr_t) *tmpargs;
|
|
if (((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS
|
|
&& orig_teams != new_teams)
|
|
|| ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT
|
|
&& orig_threads != new_threads))
|
|
{
|
|
tmpargs++;
|
|
if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
|
|
tmpargs++;
|
|
}
|
|
else
|
|
{
|
|
*tmp_new_args++ = *tmpargs++;
|
|
if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
|
|
*tmp_new_args++ = *tmpargs++;
|
|
}
|
|
}
|
|
|
|
/* Add the new TEAMS arg to the new args list if it has been changed. */
|
|
if (orig_teams != new_teams)
|
|
{
|
|
intptr_t new_val = new_teams;
|
|
if (tms_len == 1)
|
|
{
|
|
new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
|
|
| GOMP_TARGET_ARG_NUM_TEAMS;
|
|
*tmp_new_args++ = (void *) new_val;
|
|
}
|
|
else
|
|
{
|
|
*tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
|
|
| GOMP_TARGET_ARG_NUM_TEAMS);
|
|
*tmp_new_args++ = (void *) new_val;
|
|
}
|
|
}
|
|
|
|
/* Add the new THREADS arg to the new args list if it has been changed. */
|
|
if (orig_threads != new_threads)
|
|
{
|
|
intptr_t new_val = new_threads;
|
|
if (ths_len == 1)
|
|
{
|
|
new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
|
|
| GOMP_TARGET_ARG_THREAD_LIMIT;
|
|
*tmp_new_args++ = (void *) new_val;
|
|
}
|
|
else
|
|
{
|
|
*tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
|
|
| GOMP_TARGET_ARG_THREAD_LIMIT);
|
|
*tmp_new_args++ = (void *) new_val;
|
|
}
|
|
}
|
|
|
|
*tmp_new_args = NULL;
|
|
}
|
|
|
|
flags = clear_unsupported_flags (devicep, flags);
|
|
|
|
if (flags & GOMP_TARGET_FLAG_NOWAIT)
|
|
{
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
/* Create a team if we don't have any around, as nowait
|
|
target tasks make sense to run asynchronously even when
|
|
outside of any parallel. */
|
|
if (__builtin_expect (thr->ts.team == NULL, 0))
|
|
{
|
|
struct gomp_team *team = gomp_new_team (1);
|
|
struct gomp_task *task = thr->task;
|
|
struct gomp_task **implicit_task = &task;
|
|
struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
|
|
team->prev_ts = thr->ts;
|
|
thr->ts.team = team;
|
|
thr->ts.team_id = 0;
|
|
thr->ts.work_share = &team->work_shares[0];
|
|
thr->ts.last_work_share = NULL;
|
|
#ifdef HAVE_SYNC_BUILTINS
|
|
thr->ts.single_count = 0;
|
|
#endif
|
|
thr->ts.static_trip = 0;
|
|
thr->task = &team->implicit_task[0];
|
|
gomp_init_task (thr->task, NULL, icv);
|
|
while (*implicit_task
|
|
&& (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
|
|
implicit_task = &(*implicit_task)->parent;
|
|
if (*implicit_task)
|
|
{
|
|
thr->task = *implicit_task;
|
|
gomp_end_task ();
|
|
free (*implicit_task);
|
|
thr->task = &team->implicit_task[0];
|
|
}
|
|
else
|
|
pthread_setspecific (gomp_thread_destructor, thr);
|
|
if (implicit_task != &task)
|
|
{
|
|
*implicit_task = thr->task;
|
|
thr->task = task;
|
|
}
|
|
}
|
|
if (thr->ts.team
|
|
&& !thr->task->final_task)
|
|
{
|
|
gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
|
|
sizes, kinds, flags, depend, new_args,
|
|
GOMP_TARGET_TASK_BEFORE_MAP);
|
|
return;
|
|
}
|
|
}
|
|
|
|
/* If there are depend clauses, but nowait is not present
|
|
(or we are in a final task), block the parent task until the
|
|
dependencies are resolved and then just continue with the rest
|
|
of the function as if it is a merged task. */
|
|
if (depend != NULL)
|
|
{
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
if (thr->task && thr->task->depend_hash)
|
|
{
|
|
/* If we might need to wait, copy firstprivate now. */
|
|
calculate_firstprivate_requirements (mapnum, sizes, kinds,
|
|
&tgt_align, &tgt_size);
|
|
if (tgt_align)
|
|
{
|
|
char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
|
|
copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
|
|
tgt_align, tgt_size);
|
|
}
|
|
fpc_done = true;
|
|
gomp_task_maybe_wait_for_dependencies (depend);
|
|
}
|
|
}
|
|
|
|
void *fn_addr;
|
|
if (devicep == NULL
|
|
|| !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
|| !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
|
|
|| (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
|
|
{
|
|
if (!fpc_done)
|
|
{
|
|
calculate_firstprivate_requirements (mapnum, sizes, kinds,
|
|
&tgt_align, &tgt_size);
|
|
if (tgt_align)
|
|
{
|
|
char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
|
|
copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
|
|
tgt_align, tgt_size);
|
|
}
|
|
}
|
|
gomp_target_fallback (fn, hostaddrs, devicep, new_args);
|
|
return;
|
|
}
|
|
|
|
struct target_mem_desc *tgt_vars;
|
|
htab_t refcount_set = NULL;
|
|
|
|
if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
{
|
|
if (!fpc_done)
|
|
{
|
|
calculate_firstprivate_requirements (mapnum, sizes, kinds,
|
|
&tgt_align, &tgt_size);
|
|
if (tgt_align)
|
|
{
|
|
char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
|
|
copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
|
|
tgt_align, tgt_size);
|
|
}
|
|
}
|
|
tgt_vars = NULL;
|
|
}
|
|
else
|
|
{
|
|
refcount_set = htab_create (mapnum);
|
|
tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
|
|
true, &refcount_set, GOMP_MAP_VARS_TARGET);
|
|
}
|
|
devicep->run_func (devicep->target_id, fn_addr,
|
|
tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
|
|
new_args);
|
|
if (tgt_vars)
|
|
{
|
|
htab_clear (refcount_set);
|
|
gomp_unmap_vars (tgt_vars, true, &refcount_set);
|
|
}
|
|
if (refcount_set)
|
|
htab_free (refcount_set);
|
|
|
|
/* Copy back ICVs from device to host.
|
|
HOST_PTR is expected to exist since it was added in
|
|
gomp_load_image_to_device if not already available. */
|
|
gomp_copy_back_icvs (devicep, device);
|
|
|
|
}
|
|
|
|
|
|
/* Reverse lookup (device addr -> host addr) for reverse offload. We avoid
|
|
keeping track of all variable handling - assuming that reverse offload occurs
|
|
ony very rarely. Downside is that the reverse search is slow. */
|
|
|
|
struct gomp_splay_tree_rev_lookup_data {
|
|
uintptr_t tgt_start;
|
|
uintptr_t tgt_end;
|
|
splay_tree_key key;
|
|
};
|
|
|
|
static int
|
|
gomp_splay_tree_rev_lookup (splay_tree_key key, void *d)
|
|
{
|
|
struct gomp_splay_tree_rev_lookup_data *data;
|
|
data = (struct gomp_splay_tree_rev_lookup_data *)d;
|
|
uintptr_t tgt_start = key->tgt->tgt_start + key->tgt_offset;
|
|
|
|
if (tgt_start > data->tgt_start || key->tgt->list_count == 0)
|
|
return 0;
|
|
|
|
size_t j;
|
|
for (j = 0; j < key->tgt->list_count; j++)
|
|
if (key->tgt->list[j].key == key)
|
|
break;
|
|
assert (j < key->tgt->list_count);
|
|
uintptr_t tgt_end = tgt_start + key->tgt->list[j].length;
|
|
|
|
if ((tgt_start == data->tgt_start && tgt_end == data->tgt_end)
|
|
|| (tgt_end > data->tgt_start && tgt_start < data->tgt_end))
|
|
{
|
|
data->key = key;
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static inline splay_tree_key
|
|
gomp_map_rev_lookup (splay_tree mem_map, uint64_t tgt_start, uint64_t tgt_end,
|
|
bool zero_len)
|
|
{
|
|
struct gomp_splay_tree_rev_lookup_data data;
|
|
data.key = NULL;
|
|
data.tgt_start = tgt_start;
|
|
data.tgt_end = tgt_end;
|
|
|
|
if (tgt_start != tgt_end)
|
|
{
|
|
splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
|
|
return data.key;
|
|
}
|
|
|
|
data.tgt_end++;
|
|
splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
|
|
if (data.key != NULL || zero_len)
|
|
return data.key;
|
|
data.tgt_end--;
|
|
|
|
data.tgt_start--;
|
|
splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
|
|
return data.key;
|
|
}
|
|
|
|
struct cpy_data
|
|
{
|
|
uint64_t devaddr;
|
|
bool present, aligned;
|
|
};
|
|
|
|
|
|
/* Search just mapped reverse-offload data; returns index if found,
|
|
otherwise >= n. */
|
|
|
|
static inline int
|
|
gomp_map_cdata_lookup_int (struct cpy_data *d, uint64_t *devaddrs,
|
|
unsigned short *kinds, uint64_t *sizes, size_t n,
|
|
uint64_t tgt_start, uint64_t tgt_end)
|
|
{
|
|
const bool short_mapkind = true;
|
|
const int typemask = short_mapkind ? 0xff : 0x7;
|
|
size_t i;
|
|
for (i = 0; i < n; i++)
|
|
{
|
|
bool is_struct = ((get_kind (short_mapkind, kinds, i) & typemask)
|
|
== GOMP_MAP_STRUCT);
|
|
uint64_t dev_end;
|
|
if (!is_struct)
|
|
dev_end = d[i].devaddr + sizes[i];
|
|
else
|
|
{
|
|
if (i + sizes[i] < n)
|
|
dev_end = d[i + sizes[i]].devaddr + sizes[i + sizes[i]];
|
|
else
|
|
dev_end = devaddrs[i + sizes[i]] + sizes[i + sizes[i]];
|
|
}
|
|
if ((d[i].devaddr == tgt_start && dev_end == tgt_end)
|
|
|| (dev_end > tgt_start && d[i].devaddr < tgt_end))
|
|
break;
|
|
if (is_struct)
|
|
i += sizes[i];
|
|
}
|
|
return i;
|
|
}
|
|
|
|
static inline int
|
|
gomp_map_cdata_lookup (struct cpy_data *d, uint64_t *devaddrs,
|
|
unsigned short *kinds, uint64_t *sizes,
|
|
size_t n, uint64_t tgt_start, uint64_t tgt_end,
|
|
bool zero_len)
|
|
{
|
|
size_t i;
|
|
if (tgt_start != tgt_end)
|
|
return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
|
|
tgt_start, tgt_end);
|
|
tgt_end++;
|
|
i = gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
|
|
tgt_start, tgt_end);
|
|
if (i < n || zero_len)
|
|
return i;
|
|
tgt_end--;
|
|
|
|
tgt_start--;
|
|
return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
|
|
tgt_start, tgt_end);
|
|
}
|
|
|
|
/* Handle reverse offload. This is called by the device plugins for a
|
|
reverse offload; it is not called if the outer target runs on the host.
|
|
The mapping is simplified device-affecting constructs (except for target
|
|
with device(ancestor:1)) must not be encountered; in particular not
|
|
target (enter/exit) data. */
|
|
|
|
void
|
|
gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr,
|
|
uint64_t sizes_ptr, uint64_t kinds_ptr, int dev_num,
|
|
struct goacc_asyncqueue *aq)
|
|
{
|
|
/* Return early if there is no offload code. */
|
|
if (sizeof (OFFLOAD_PLUGINS) == sizeof (""))
|
|
return;
|
|
/* Currently, this fails because of calculate_firstprivate_requirements
|
|
below; it could be fixed but additional code needs to be updated to
|
|
handle 32bit hosts - thus, it is not worthwhile. */
|
|
if (sizeof (void *) != sizeof (uint64_t))
|
|
gomp_fatal ("Reverse offload of 32bit hosts not supported.");
|
|
|
|
struct cpy_data *cdata = NULL;
|
|
uint64_t *devaddrs;
|
|
uint64_t *sizes;
|
|
unsigned short *kinds;
|
|
const bool short_mapkind = true;
|
|
const int typemask = short_mapkind ? 0xff : 0x7;
|
|
struct gomp_device_descr *devicep = resolve_device (dev_num, false);
|
|
|
|
reverse_splay_tree_key n;
|
|
struct reverse_splay_tree_key_s k;
|
|
k.dev = fn_ptr;
|
|
|
|
gomp_mutex_lock (&devicep->lock);
|
|
n = gomp_map_lookup_rev (&devicep->mem_map_rev, &k);
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
|
|
if (n == NULL)
|
|
gomp_fatal ("Cannot find reverse-offload function");
|
|
void (*host_fn) (void *) = (void (*) (void *)) n->k->host_start;
|
|
|
|
if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || mapnum == 0)
|
|
{
|
|
devaddrs = (uint64_t *) (uintptr_t) devaddrs_ptr;
|
|
sizes = (uint64_t *) (uintptr_t) sizes_ptr;
|
|
kinds = (unsigned short *) (uintptr_t) kinds_ptr;
|
|
}
|
|
else
|
|
{
|
|
devaddrs = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
|
|
sizes = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
|
|
kinds = (unsigned short *) gomp_malloc (mapnum * sizeof (unsigned short));
|
|
gomp_copy_dev2host (devicep, aq, devaddrs,
|
|
(const void *) (uintptr_t) devaddrs_ptr,
|
|
mapnum * sizeof (uint64_t));
|
|
gomp_copy_dev2host (devicep, aq, sizes,
|
|
(const void *) (uintptr_t) sizes_ptr,
|
|
mapnum * sizeof (uint64_t));
|
|
gomp_copy_dev2host (devicep, aq, kinds,
|
|
(const void *) (uintptr_t) kinds_ptr,
|
|
mapnum * sizeof (unsigned short));
|
|
if (aq && !devicep->openacc.async.synchronize_func (aq))
|
|
exit (EXIT_FAILURE);
|
|
}
|
|
|
|
size_t tgt_align = 0, tgt_size = 0;
|
|
|
|
/* If actually executed on 32bit systems, the casts lead to wrong code;
|
|
but 32bit with offloading is not supported; see top of this function. */
|
|
calculate_firstprivate_requirements (mapnum, (void *) (uintptr_t) sizes,
|
|
(void *) (uintptr_t) kinds,
|
|
&tgt_align, &tgt_size);
|
|
|
|
if (tgt_align)
|
|
{
|
|
char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
|
|
uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
|
|
if (al)
|
|
tgt += tgt_align - al;
|
|
tgt_size = 0;
|
|
for (uint64_t i = 0; i < mapnum; i++)
|
|
if (get_kind (short_mapkind, kinds, i) == GOMP_MAP_FIRSTPRIVATE
|
|
&& devaddrs[i] != 0)
|
|
{
|
|
size_t align = (size_t) 1 << (kinds[i] >> 8);
|
|
tgt_size = (tgt_size + align - 1) & ~(align - 1);
|
|
if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
memcpy (tgt + tgt_size, (void *) (uintptr_t) devaddrs[i],
|
|
(size_t) sizes[i]);
|
|
else
|
|
{
|
|
gomp_copy_dev2host (devicep, aq, tgt + tgt_size,
|
|
(void *) (uintptr_t) devaddrs[i],
|
|
(size_t) sizes[i]);
|
|
if (aq && !devicep->openacc.async.synchronize_func (aq))
|
|
exit (EXIT_FAILURE);
|
|
}
|
|
devaddrs[i] = (uint64_t) (uintptr_t) tgt + tgt_size;
|
|
tgt_size = tgt_size + sizes[i];
|
|
if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
&& i + 1 < mapnum
|
|
&& ((get_kind (short_mapkind, kinds, i) & typemask)
|
|
== GOMP_MAP_ATTACH))
|
|
{
|
|
*(uint64_t*) (uintptr_t) (devaddrs[i+1] + sizes[i+1])
|
|
= (uint64_t) devaddrs[i];
|
|
++i;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
|
|
{
|
|
size_t j, struct_cpy = 0;
|
|
splay_tree_key n2;
|
|
cdata = gomp_alloca (sizeof (*cdata) * mapnum);
|
|
memset (cdata, '\0', sizeof (*cdata) * mapnum);
|
|
gomp_mutex_lock (&devicep->lock);
|
|
for (uint64_t i = 0; i < mapnum; i++)
|
|
{
|
|
if (devaddrs[i] == 0)
|
|
continue;
|
|
n = NULL;
|
|
int kind = get_kind (short_mapkind, kinds, i) & typemask;
|
|
switch (kind)
|
|
{
|
|
case GOMP_MAP_FIRSTPRIVATE:
|
|
case GOMP_MAP_FIRSTPRIVATE_INT:
|
|
continue;
|
|
|
|
case GOMP_MAP_DELETE:
|
|
case GOMP_MAP_RELEASE:
|
|
case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
|
|
/* Assume it is present; look it up - but ignore unless the
|
|
present clause is there. */
|
|
case GOMP_MAP_ALLOC:
|
|
case GOMP_MAP_FROM:
|
|
case GOMP_MAP_FORCE_ALLOC:
|
|
case GOMP_MAP_FORCE_FROM:
|
|
case GOMP_MAP_ALWAYS_FROM:
|
|
case GOMP_MAP_TO:
|
|
case GOMP_MAP_TOFROM:
|
|
case GOMP_MAP_FORCE_TO:
|
|
case GOMP_MAP_FORCE_TOFROM:
|
|
case GOMP_MAP_ALWAYS_TO:
|
|
case GOMP_MAP_ALWAYS_TOFROM:
|
|
case GOMP_MAP_FORCE_PRESENT:
|
|
case GOMP_MAP_ALWAYS_PRESENT_FROM:
|
|
case GOMP_MAP_ALWAYS_PRESENT_TO:
|
|
case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
|
|
case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
|
|
cdata[i].devaddr = devaddrs[i];
|
|
bool zero_len = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
|
|
|| kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION);
|
|
j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
|
|
devaddrs[i],
|
|
devaddrs[i] + sizes[i], zero_len);
|
|
if (j < i)
|
|
{
|
|
n2 = NULL;
|
|
cdata[i].present = true;
|
|
devaddrs[i] = devaddrs[j] + devaddrs[i] - cdata[j].devaddr;
|
|
}
|
|
else
|
|
{
|
|
n2 = gomp_map_rev_lookup (&devicep->mem_map,
|
|
devaddrs[i],
|
|
devaddrs[i] + sizes[i], zero_len);
|
|
cdata[i].present = n2 != NULL;
|
|
}
|
|
if (!cdata[i].present && GOMP_MAP_PRESENT_P (kind))
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
#ifdef HAVE_INTTYPES_H
|
|
gomp_fatal ("present clause: no corresponding data on "
|
|
"parent device at %p with size %"PRIu64,
|
|
(void *) (uintptr_t) devaddrs[i],
|
|
(uint64_t) sizes[i]);
|
|
#else
|
|
gomp_fatal ("present clause: no corresponding data on "
|
|
"parent device at %p with size %lu",
|
|
(void *) (uintptr_t) devaddrs[i],
|
|
(unsigned long) sizes[i]);
|
|
#endif
|
|
break;
|
|
}
|
|
else if (!cdata[i].present
|
|
&& kind != GOMP_MAP_DELETE
|
|
&& kind != GOMP_MAP_RELEASE
|
|
&& kind != GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
|
|
{
|
|
cdata[i].aligned = true;
|
|
size_t align = (size_t) 1 << (kinds[i] >> 8);
|
|
devaddrs[i]
|
|
= (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
|
|
sizes[i]);
|
|
}
|
|
else if (n2 != NULL)
|
|
devaddrs[i] = (n2->host_start + cdata[i].devaddr
|
|
- (n2->tgt->tgt_start + n2->tgt_offset));
|
|
if (((!cdata[i].present || struct_cpy)
|
|
&& (kind == GOMP_MAP_TO || kind == GOMP_MAP_TOFROM))
|
|
|| kind == GOMP_MAP_FORCE_TO
|
|
|| kind == GOMP_MAP_FORCE_TOFROM
|
|
|| GOMP_MAP_ALWAYS_TO_P (kind))
|
|
{
|
|
gomp_copy_dev2host (devicep, aq,
|
|
(void *) (uintptr_t) devaddrs[i],
|
|
(void *) (uintptr_t) cdata[i].devaddr,
|
|
sizes[i]);
|
|
if (aq && !devicep->openacc.async.synchronize_func (aq))
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
exit (EXIT_FAILURE);
|
|
}
|
|
}
|
|
if (struct_cpy)
|
|
struct_cpy--;
|
|
break;
|
|
case GOMP_MAP_ATTACH:
|
|
case GOMP_MAP_POINTER:
|
|
case GOMP_MAP_ALWAYS_POINTER:
|
|
n2 = gomp_map_rev_lookup (&devicep->mem_map,
|
|
devaddrs[i] + sizes[i],
|
|
devaddrs[i] + sizes[i]
|
|
+ sizeof (void*), false);
|
|
cdata[i].present = n2 != NULL;
|
|
cdata[i].devaddr = devaddrs[i];
|
|
if (n2)
|
|
devaddrs[i] = (n2->host_start + cdata[i].devaddr
|
|
- (n2->tgt->tgt_start + n2->tgt_offset));
|
|
else
|
|
{
|
|
j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
|
|
devaddrs[i] + sizes[i],
|
|
devaddrs[i] + sizes[i]
|
|
+ sizeof (void*), false);
|
|
if (j < i)
|
|
{
|
|
cdata[i].present = true;
|
|
devaddrs[i] = (devaddrs[j] + devaddrs[i]
|
|
- cdata[j].devaddr);
|
|
}
|
|
}
|
|
if (!cdata[i].present)
|
|
devaddrs[i] = (uintptr_t) gomp_malloc (sizeof (void*));
|
|
/* Assume that when present, the pointer is already correct. */
|
|
if (!n2)
|
|
*(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[i])
|
|
= devaddrs[i-1];
|
|
break;
|
|
case GOMP_MAP_TO_PSET:
|
|
/* Assume that when present, the pointers are fine and no 'to:'
|
|
is required. */
|
|
n2 = gomp_map_rev_lookup (&devicep->mem_map,
|
|
devaddrs[i], devaddrs[i] + sizes[i],
|
|
false);
|
|
cdata[i].present = n2 != NULL;
|
|
cdata[i].devaddr = devaddrs[i];
|
|
if (n2)
|
|
devaddrs[i] = (n2->host_start + cdata[i].devaddr
|
|
- (n2->tgt->tgt_start + n2->tgt_offset));
|
|
else
|
|
{
|
|
j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
|
|
devaddrs[i],
|
|
devaddrs[i] + sizes[i], false);
|
|
if (j < i)
|
|
{
|
|
cdata[i].present = true;
|
|
devaddrs[i] = (devaddrs[j] + devaddrs[i]
|
|
- cdata[j].devaddr);
|
|
}
|
|
}
|
|
if (!cdata[i].present)
|
|
{
|
|
cdata[i].aligned = true;
|
|
size_t align = (size_t) 1 << (kinds[i] >> 8);
|
|
devaddrs[i]
|
|
= (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
|
|
sizes[i]);
|
|
gomp_copy_dev2host (devicep, aq,
|
|
(void *) (uintptr_t) devaddrs[i],
|
|
(void *) (uintptr_t) cdata[i].devaddr,
|
|
sizes[i]);
|
|
if (aq && !devicep->openacc.async.synchronize_func (aq))
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
exit (EXIT_FAILURE);
|
|
}
|
|
}
|
|
for (j = i + 1; j < mapnum; j++)
|
|
{
|
|
kind = get_kind (short_mapkind, kinds, j) & typemask;
|
|
if (!GOMP_MAP_ALWAYS_POINTER_P (kind)
|
|
&& !GOMP_MAP_POINTER_P (kind))
|
|
break;
|
|
if (devaddrs[j] < devaddrs[i])
|
|
break;
|
|
if (cdata[i].present)
|
|
continue;
|
|
if (devaddrs[j] == 0)
|
|
{
|
|
*(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[j]) = 0;
|
|
continue;
|
|
}
|
|
int k;
|
|
n2 = NULL;
|
|
/* Dereference devaddrs[j] to get the device addr. */
|
|
assert (devaddrs[j] - sizes[j] == cdata[i].devaddr);
|
|
devaddrs[j] = *(uint64_t *) (uintptr_t) (devaddrs[i]
|
|
+ sizes[j]);
|
|
cdata[j].present = true;
|
|
cdata[j].devaddr = devaddrs[j];
|
|
if (devaddrs[j] == 0)
|
|
continue;
|
|
k = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, j,
|
|
devaddrs[j],
|
|
devaddrs[j] + sizeof (void*),
|
|
false);
|
|
if (k < j)
|
|
devaddrs[j] = (devaddrs[k] + devaddrs[j]
|
|
- cdata[k].devaddr);
|
|
else
|
|
{
|
|
n2 = gomp_map_rev_lookup (&devicep->mem_map,
|
|
devaddrs[j],
|
|
devaddrs[j] + sizeof (void*),
|
|
false);
|
|
if (n2 == NULL)
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
gomp_fatal ("Pointer target wasn't mapped");
|
|
}
|
|
devaddrs[j] = (n2->host_start + cdata[j].devaddr
|
|
- (n2->tgt->tgt_start + n2->tgt_offset));
|
|
}
|
|
*(void **) (uintptr_t) (devaddrs[i] + sizes[j])
|
|
= (void *) (uintptr_t) devaddrs[j];
|
|
}
|
|
i = j -1;
|
|
break;
|
|
case GOMP_MAP_STRUCT:
|
|
n2 = gomp_map_rev_lookup (&devicep->mem_map, devaddrs[i+1],
|
|
devaddrs[i + sizes[i]]
|
|
+ sizes[i + sizes[i]], false);
|
|
cdata[i].present = n2 != NULL;
|
|
cdata[i].devaddr = devaddrs[i];
|
|
struct_cpy = cdata[i].present ? 0 : sizes[i];
|
|
if (!n2)
|
|
{
|
|
size_t sz = (size_t) (devaddrs[i + sizes[i]]
|
|
- devaddrs[i+1]
|
|
+ sizes[i + sizes[i]]);
|
|
size_t align = (size_t) 1 << (kinds[i] >> 8);
|
|
cdata[i].aligned = true;
|
|
devaddrs[i] = (uintptr_t) gomp_aligned_alloc (align, sz);
|
|
devaddrs[i] -= devaddrs[i+1] - cdata[i].devaddr;
|
|
}
|
|
else
|
|
devaddrs[i] = (n2->host_start + cdata[i].devaddr
|
|
- (n2->tgt->tgt_start + n2->tgt_offset));
|
|
break;
|
|
default:
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds[i]);
|
|
}
|
|
}
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
}
|
|
|
|
host_fn (devaddrs);
|
|
|
|
if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
|
|
{
|
|
uint64_t struct_cpy = 0;
|
|
bool clean_struct = false;
|
|
for (uint64_t i = 0; i < mapnum; i++)
|
|
{
|
|
if (cdata[i].devaddr == 0)
|
|
continue;
|
|
int kind = get_kind (short_mapkind, kinds, i) & typemask;
|
|
bool copy = !cdata[i].present || struct_cpy;
|
|
switch (kind)
|
|
{
|
|
case GOMP_MAP_FORCE_FROM:
|
|
case GOMP_MAP_FORCE_TOFROM:
|
|
case GOMP_MAP_ALWAYS_FROM:
|
|
case GOMP_MAP_ALWAYS_TOFROM:
|
|
case GOMP_MAP_ALWAYS_PRESENT_FROM:
|
|
case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
|
|
copy = true;
|
|
/* FALLTHRU */
|
|
case GOMP_MAP_FROM:
|
|
case GOMP_MAP_TOFROM:
|
|
if (copy)
|
|
{
|
|
gomp_copy_host2dev (devicep, aq,
|
|
(void *) (uintptr_t) cdata[i].devaddr,
|
|
(void *) (uintptr_t) devaddrs[i],
|
|
sizes[i], false, NULL);
|
|
if (aq && !devicep->openacc.async.synchronize_func (aq))
|
|
exit (EXIT_FAILURE);
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
if (struct_cpy)
|
|
{
|
|
struct_cpy--;
|
|
continue;
|
|
}
|
|
if (kind == GOMP_MAP_STRUCT && !cdata[i].present)
|
|
{
|
|
clean_struct = true;
|
|
struct_cpy = sizes[i];
|
|
}
|
|
else if (!cdata[i].present && cdata[i].aligned)
|
|
gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
|
|
else if (!cdata[i].present)
|
|
free ((void *) (uintptr_t) devaddrs[i]);
|
|
}
|
|
if (clean_struct)
|
|
for (uint64_t i = 0; i < mapnum; i++)
|
|
if (!cdata[i].present
|
|
&& ((get_kind (short_mapkind, kinds, i) & typemask)
|
|
== GOMP_MAP_STRUCT))
|
|
{
|
|
devaddrs[i] += cdata[i+1].devaddr - cdata[i].devaddr;
|
|
gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
|
|
}
|
|
|
|
free (devaddrs);
|
|
free (sizes);
|
|
free (kinds);
|
|
}
|
|
}
|
|
|
|
/* Host fallback for GOMP_target_data{,_ext} routines. */
|
|
|
|
static void
|
|
gomp_target_data_fallback (struct gomp_device_descr *devicep)
|
|
{
|
|
struct gomp_task_icv *icv = gomp_icv (false);
|
|
|
|
if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
|
|
&& devicep != NULL)
|
|
gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
|
|
"be used for offloading");
|
|
|
|
if (icv->target_data)
|
|
{
|
|
/* Even when doing a host fallback, if there are any active
|
|
#pragma omp target data constructs, need to remember the
|
|
new #pragma omp target data, otherwise GOMP_target_end_data
|
|
would get out of sync. */
|
|
struct target_mem_desc *tgt
|
|
= gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
|
|
NULL, GOMP_MAP_VARS_DATA);
|
|
tgt->prev = icv->target_data;
|
|
icv->target_data = tgt;
|
|
}
|
|
}
|
|
|
|
void
|
|
GOMP_target_data (int device, const void *unused, size_t mapnum,
|
|
void **hostaddrs, size_t *sizes, unsigned char *kinds)
|
|
{
|
|
struct gomp_device_descr *devicep = resolve_device (device, true);
|
|
|
|
if (devicep == NULL
|
|
|| !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
|| (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
|
|
return gomp_target_data_fallback (devicep);
|
|
|
|
struct target_mem_desc *tgt
|
|
= gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
|
|
NULL, GOMP_MAP_VARS_DATA);
|
|
struct gomp_task_icv *icv = gomp_icv (true);
|
|
tgt->prev = icv->target_data;
|
|
icv->target_data = tgt;
|
|
}
|
|
|
|
void
|
|
GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
|
|
size_t *sizes, unsigned short *kinds)
|
|
{
|
|
struct gomp_device_descr *devicep = resolve_device (device, true);
|
|
|
|
if (devicep == NULL
|
|
|| !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
|| devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
return gomp_target_data_fallback (devicep);
|
|
|
|
struct target_mem_desc *tgt
|
|
= gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
|
|
NULL, GOMP_MAP_VARS_DATA);
|
|
struct gomp_task_icv *icv = gomp_icv (true);
|
|
tgt->prev = icv->target_data;
|
|
icv->target_data = tgt;
|
|
}
|
|
|
|
void
|
|
GOMP_target_end_data (void)
|
|
{
|
|
struct gomp_task_icv *icv = gomp_icv (false);
|
|
if (icv->target_data)
|
|
{
|
|
struct target_mem_desc *tgt = icv->target_data;
|
|
icv->target_data = tgt->prev;
|
|
gomp_unmap_vars (tgt, true, NULL);
|
|
}
|
|
}
|
|
|
|
void
|
|
GOMP_target_update (int device, const void *unused, size_t mapnum,
|
|
void **hostaddrs, size_t *sizes, unsigned char *kinds)
|
|
{
|
|
struct gomp_device_descr *devicep = resolve_device (device, true);
|
|
|
|
if (devicep == NULL
|
|
|| !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
|| devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
return;
|
|
|
|
gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
|
|
}
|
|
|
|
void
|
|
GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
|
|
size_t *sizes, unsigned short *kinds,
|
|
unsigned int flags, void **depend)
|
|
{
|
|
struct gomp_device_descr *devicep = resolve_device (device, true);
|
|
|
|
/* If there are depend clauses, but nowait is not present,
|
|
block the parent task until the dependencies are resolved
|
|
and then just continue with the rest of the function as if it
|
|
is a merged task. Until we are able to schedule task during
|
|
variable mapping or unmapping, ignore nowait if depend clauses
|
|
are not present. */
|
|
if (depend != NULL)
|
|
{
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
if (thr->task && thr->task->depend_hash)
|
|
{
|
|
if ((flags & GOMP_TARGET_FLAG_NOWAIT)
|
|
&& thr->ts.team
|
|
&& !thr->task->final_task)
|
|
{
|
|
if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
|
|
mapnum, hostaddrs, sizes, kinds,
|
|
flags | GOMP_TARGET_FLAG_UPDATE,
|
|
depend, NULL, GOMP_TARGET_TASK_DATA))
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
struct gomp_team *team = thr->ts.team;
|
|
/* If parallel or taskgroup has been cancelled, don't start new
|
|
tasks. */
|
|
if (__builtin_expect (gomp_cancel_var, 0) && team)
|
|
{
|
|
if (gomp_team_barrier_cancelled (&team->barrier))
|
|
return;
|
|
if (thr->task->taskgroup)
|
|
{
|
|
if (thr->task->taskgroup->cancelled)
|
|
return;
|
|
if (thr->task->taskgroup->workshare
|
|
&& thr->task->taskgroup->prev
|
|
&& thr->task->taskgroup->prev->cancelled)
|
|
return;
|
|
}
|
|
}
|
|
|
|
gomp_task_maybe_wait_for_dependencies (depend);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (devicep == NULL
|
|
|| !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
|| devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
return;
|
|
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
struct gomp_team *team = thr->ts.team;
|
|
/* If parallel or taskgroup has been cancelled, don't start new tasks. */
|
|
if (__builtin_expect (gomp_cancel_var, 0) && team)
|
|
{
|
|
if (gomp_team_barrier_cancelled (&team->barrier))
|
|
return;
|
|
if (thr->task->taskgroup)
|
|
{
|
|
if (thr->task->taskgroup->cancelled)
|
|
return;
|
|
if (thr->task->taskgroup->workshare
|
|
&& thr->task->taskgroup->prev
|
|
&& thr->task->taskgroup->prev->cancelled)
|
|
return;
|
|
}
|
|
}
|
|
|
|
gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
|
|
}
|
|
|
|
static void
|
|
gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
|
|
void **hostaddrs, size_t *sizes, unsigned short *kinds,
|
|
htab_t *refcount_set)
|
|
{
|
|
const int typemask = 0xff;
|
|
size_t i;
|
|
gomp_mutex_lock (&devicep->lock);
|
|
if (devicep->state == GOMP_DEVICE_FINALIZED)
|
|
{
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
return;
|
|
}
|
|
|
|
for (i = 0; i < mapnum; i++)
|
|
if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
|
|
{
|
|
struct splay_tree_key_s cur_node;
|
|
cur_node.host_start = (uintptr_t) hostaddrs[i];
|
|
cur_node.host_end = cur_node.host_start + sizeof (void *);
|
|
splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
|
|
|
|
if (n)
|
|
gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
|
|
false, NULL);
|
|
}
|
|
|
|
int nrmvars = 0;
|
|
splay_tree_key remove_vars[mapnum];
|
|
|
|
for (i = 0; i < mapnum; i++)
|
|
{
|
|
struct splay_tree_key_s cur_node;
|
|
unsigned char kind = kinds[i] & typemask;
|
|
switch (kind)
|
|
{
|
|
case GOMP_MAP_FROM:
|
|
case GOMP_MAP_ALWAYS_FROM:
|
|
case GOMP_MAP_DELETE:
|
|
case GOMP_MAP_RELEASE:
|
|
case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
|
|
case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
|
|
cur_node.host_start = (uintptr_t) hostaddrs[i];
|
|
cur_node.host_end = cur_node.host_start + sizes[i];
|
|
splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
|
|
|| kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
|
|
? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
|
|
: splay_tree_lookup (&devicep->mem_map, &cur_node);
|
|
if (!k)
|
|
continue;
|
|
|
|
bool delete_p = (kind == GOMP_MAP_DELETE
|
|
|| kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION);
|
|
bool do_copy, do_remove;
|
|
gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy,
|
|
&do_remove);
|
|
|
|
if ((kind == GOMP_MAP_FROM && do_copy)
|
|
|| kind == GOMP_MAP_ALWAYS_FROM)
|
|
{
|
|
if (k->aux && k->aux->attach_count)
|
|
{
|
|
/* We have to be careful not to overwrite still attached
|
|
pointers during the copyback to host. */
|
|
uintptr_t addr = k->host_start;
|
|
while (addr < k->host_end)
|
|
{
|
|
size_t i = (addr - k->host_start) / sizeof (void *);
|
|
if (k->aux->attach_count[i] == 0)
|
|
gomp_copy_dev2host (devicep, NULL, (void *) addr,
|
|
(void *) (k->tgt->tgt_start
|
|
+ k->tgt_offset
|
|
+ addr - k->host_start),
|
|
sizeof (void *));
|
|
addr += sizeof (void *);
|
|
}
|
|
}
|
|
else
|
|
gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
|
|
(void *) (k->tgt->tgt_start + k->tgt_offset
|
|
+ cur_node.host_start
|
|
- k->host_start),
|
|
cur_node.host_end - cur_node.host_start);
|
|
}
|
|
|
|
/* Structure elements lists are removed altogether at once, which
|
|
may cause immediate deallocation of the target_mem_desc, causing
|
|
errors if we still have following element siblings to copy back.
|
|
While we're at it, it also seems more disciplined to simply
|
|
queue all removals together for processing below.
|
|
|
|
Structured block unmapping (i.e. gomp_unmap_vars_internal) should
|
|
not have this problem, since they maintain an additional
|
|
tgt->refcount = 1 reference to the target_mem_desc to start with.
|
|
*/
|
|
if (do_remove)
|
|
remove_vars[nrmvars++] = k;
|
|
break;
|
|
|
|
case GOMP_MAP_DETACH:
|
|
break;
|
|
default:
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
|
|
kind);
|
|
}
|
|
}
|
|
|
|
for (int i = 0; i < nrmvars; i++)
|
|
gomp_remove_var (devicep, remove_vars[i]);
|
|
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
}
|
|
|
|
void
|
|
GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
|
|
size_t *sizes, unsigned short *kinds,
|
|
unsigned int flags, void **depend)
|
|
{
|
|
struct gomp_device_descr *devicep = resolve_device (device, true);
|
|
|
|
/* If there are depend clauses, but nowait is not present,
|
|
block the parent task until the dependencies are resolved
|
|
and then just continue with the rest of the function as if it
|
|
is a merged task. Until we are able to schedule task during
|
|
variable mapping or unmapping, ignore nowait if depend clauses
|
|
are not present. */
|
|
if (depend != NULL)
|
|
{
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
if (thr->task && thr->task->depend_hash)
|
|
{
|
|
if ((flags & GOMP_TARGET_FLAG_NOWAIT)
|
|
&& thr->ts.team
|
|
&& !thr->task->final_task)
|
|
{
|
|
if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
|
|
mapnum, hostaddrs, sizes, kinds,
|
|
flags, depend, NULL,
|
|
GOMP_TARGET_TASK_DATA))
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
struct gomp_team *team = thr->ts.team;
|
|
/* If parallel or taskgroup has been cancelled, don't start new
|
|
tasks. */
|
|
if (__builtin_expect (gomp_cancel_var, 0) && team)
|
|
{
|
|
if (gomp_team_barrier_cancelled (&team->barrier))
|
|
return;
|
|
if (thr->task->taskgroup)
|
|
{
|
|
if (thr->task->taskgroup->cancelled)
|
|
return;
|
|
if (thr->task->taskgroup->workshare
|
|
&& thr->task->taskgroup->prev
|
|
&& thr->task->taskgroup->prev->cancelled)
|
|
return;
|
|
}
|
|
}
|
|
|
|
gomp_task_maybe_wait_for_dependencies (depend);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (devicep == NULL
|
|
|| !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
|| devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
return;
|
|
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
struct gomp_team *team = thr->ts.team;
|
|
/* If parallel or taskgroup has been cancelled, don't start new tasks. */
|
|
if (__builtin_expect (gomp_cancel_var, 0) && team)
|
|
{
|
|
if (gomp_team_barrier_cancelled (&team->barrier))
|
|
return;
|
|
if (thr->task->taskgroup)
|
|
{
|
|
if (thr->task->taskgroup->cancelled)
|
|
return;
|
|
if (thr->task->taskgroup->workshare
|
|
&& thr->task->taskgroup->prev
|
|
&& thr->task->taskgroup->prev->cancelled)
|
|
return;
|
|
}
|
|
}
|
|
|
|
htab_t refcount_set = htab_create (mapnum);
|
|
|
|
/* The variables are mapped separately such that they can be released
|
|
independently. */
|
|
size_t i, j;
|
|
if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
|
|
for (i = 0; i < mapnum; i++)
|
|
if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT
|
|
|| (kinds[i] & 0xff) == GOMP_MAP_STRUCT_UNORD)
|
|
{
|
|
gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
|
|
&kinds[i], true, &refcount_set,
|
|
GOMP_MAP_VARS_ENTER_DATA);
|
|
i += sizes[i];
|
|
}
|
|
else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
|
|
{
|
|
for (j = i + 1; j < mapnum; j++)
|
|
if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
|
|
&& !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
|
|
break;
|
|
gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
|
|
&kinds[i], true, &refcount_set,
|
|
GOMP_MAP_VARS_ENTER_DATA);
|
|
i += j - i - 1;
|
|
}
|
|
else if (i + 1 < mapnum
|
|
&& ((kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH
|
|
|| ((kinds[i + 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER
|
|
&& (kinds[i] & 0xff) != GOMP_MAP_ALWAYS_POINTER)))
|
|
{
|
|
/* An attach operation must be processed together with the mapped
|
|
base-pointer list item. */
|
|
gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
|
|
true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
|
|
i += 1;
|
|
}
|
|
else
|
|
gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
|
|
true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
|
|
else
|
|
gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set);
|
|
htab_free (refcount_set);
|
|
}
|
|
|
|
bool
|
|
gomp_target_task_fn (void *data)
|
|
{
|
|
struct gomp_target_task *ttask = (struct gomp_target_task *) data;
|
|
struct gomp_device_descr *devicep = ttask->devicep;
|
|
|
|
if (ttask->fn != NULL)
|
|
{
|
|
void *fn_addr;
|
|
if (devicep == NULL
|
|
|| !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
|| !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
|
|
|| (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
|
|
{
|
|
ttask->state = GOMP_TARGET_TASK_FALLBACK;
|
|
gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep,
|
|
ttask->args);
|
|
return false;
|
|
}
|
|
|
|
if (ttask->state == GOMP_TARGET_TASK_FINISHED)
|
|
{
|
|
if (ttask->tgt)
|
|
gomp_unmap_vars (ttask->tgt, true, NULL);
|
|
return false;
|
|
}
|
|
|
|
void *actual_arguments;
|
|
if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
{
|
|
ttask->tgt = NULL;
|
|
actual_arguments = ttask->hostaddrs;
|
|
}
|
|
else
|
|
{
|
|
ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
|
|
NULL, ttask->sizes, ttask->kinds, true,
|
|
NULL, GOMP_MAP_VARS_TARGET);
|
|
actual_arguments = (void *) ttask->tgt->tgt_start;
|
|
}
|
|
ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
|
|
|
|
assert (devicep->async_run_func);
|
|
devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
|
|
ttask->args, (void *) ttask);
|
|
return true;
|
|
}
|
|
else if (devicep == NULL
|
|
|| !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
|| devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
return false;
|
|
|
|
size_t i;
|
|
if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
|
|
gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
|
|
ttask->kinds, true);
|
|
else
|
|
{
|
|
htab_t refcount_set = htab_create (ttask->mapnum);
|
|
if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
|
|
for (i = 0; i < ttask->mapnum; i++)
|
|
if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT
|
|
|| (ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT_UNORD)
|
|
{
|
|
gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
|
|
NULL, &ttask->sizes[i], &ttask->kinds[i], true,
|
|
&refcount_set, GOMP_MAP_VARS_ENTER_DATA);
|
|
i += ttask->sizes[i];
|
|
}
|
|
else
|
|
gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
|
|
&ttask->kinds[i], true, &refcount_set,
|
|
GOMP_MAP_VARS_ENTER_DATA);
|
|
else
|
|
gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
|
|
ttask->kinds, &refcount_set);
|
|
htab_free (refcount_set);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/* Implement OpenMP 'teams' construct, legacy entry point. */
|
|
|
|
void
|
|
GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
|
|
{
|
|
if (thread_limit)
|
|
{
|
|
struct gomp_task_icv *icv = gomp_icv (true);
|
|
icv->thread_limit_var
|
|
= thread_limit > INT_MAX ? UINT_MAX : thread_limit;
|
|
}
|
|
(void) num_teams;
|
|
}
|
|
|
|
/* Implement OpenMP 'teams' construct.
|
|
|
|
Initialize upon FIRST call. Return whether this invocation is active.
|
|
Depending on whether NUM_TEAMS_LOW asks for more teams than are provided
|
|
in hardware, we may need to loop multiple times; in that case make sure to
|
|
update the team-level variable used by 'omp_get_team_num'. */
|
|
|
|
bool
|
|
GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
|
|
unsigned int thread_limit, bool first)
|
|
{
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
if (first)
|
|
{
|
|
if (thread_limit)
|
|
{
|
|
struct gomp_task_icv *icv = gomp_icv (true);
|
|
icv->thread_limit_var
|
|
= thread_limit > INT_MAX ? UINT_MAX : thread_limit;
|
|
}
|
|
(void) num_teams_high;
|
|
if (num_teams_low == 0)
|
|
num_teams_low = 1;
|
|
thr->num_teams = num_teams_low - 1;
|
|
thr->team_num = 0;
|
|
}
|
|
else if (thr->team_num == thr->num_teams)
|
|
return false;
|
|
else
|
|
++thr->team_num;
|
|
return true;
|
|
}
|
|
|
|
void *
|
|
omp_target_alloc (size_t size, int device_num)
|
|
{
|
|
if (device_num == omp_initial_device
|
|
|| device_num == gomp_get_num_devices ())
|
|
return malloc (size);
|
|
|
|
struct gomp_device_descr *devicep = resolve_device (device_num, false);
|
|
if (devicep == NULL)
|
|
return NULL;
|
|
|
|
if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
|| devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
return malloc (size);
|
|
|
|
gomp_mutex_lock (&devicep->lock);
|
|
void *ret = devicep->alloc_func (devicep->target_id, size);
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
return ret;
|
|
}
|
|
|
|
void
|
|
omp_target_free (void *device_ptr, int device_num)
|
|
{
|
|
if (device_num == omp_initial_device
|
|
|| device_num == gomp_get_num_devices ())
|
|
{
|
|
free (device_ptr);
|
|
return;
|
|
}
|
|
|
|
struct gomp_device_descr *devicep = resolve_device (device_num, false);
|
|
if (devicep == NULL || device_ptr == NULL)
|
|
return;
|
|
|
|
if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
|| devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
{
|
|
free (device_ptr);
|
|
return;
|
|
}
|
|
|
|
gomp_mutex_lock (&devicep->lock);
|
|
gomp_free_device_memory (devicep, device_ptr);
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
}
|
|
|
|
int
|
|
omp_target_is_present (const void *ptr, int device_num)
|
|
{
|
|
if (device_num == omp_initial_device
|
|
|| device_num == gomp_get_num_devices ())
|
|
return 1;
|
|
|
|
struct gomp_device_descr *devicep = resolve_device (device_num, false);
|
|
if (devicep == NULL)
|
|
return 0;
|
|
|
|
if (ptr == NULL)
|
|
return 1;
|
|
|
|
if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
|| devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
return 1;
|
|
|
|
gomp_mutex_lock (&devicep->lock);
|
|
struct splay_tree_s *mem_map = &devicep->mem_map;
|
|
struct splay_tree_key_s cur_node;
|
|
|
|
cur_node.host_start = (uintptr_t) ptr;
|
|
cur_node.host_end = cur_node.host_start;
|
|
splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
|
|
int ret = n != NULL;
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
return ret;
|
|
}
|
|
|
|
static int
|
|
omp_target_memcpy_check (int dst_device_num, int src_device_num,
|
|
struct gomp_device_descr **dst_devicep,
|
|
struct gomp_device_descr **src_devicep)
|
|
{
|
|
if (dst_device_num != gomp_get_num_devices ()
|
|
/* Above gomp_get_num_devices has to be called unconditionally. */
|
|
&& dst_device_num != omp_initial_device)
|
|
{
|
|
*dst_devicep = resolve_device (dst_device_num, false);
|
|
if (*dst_devicep == NULL)
|
|
return EINVAL;
|
|
|
|
if (!((*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
|| (*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
*dst_devicep = NULL;
|
|
}
|
|
|
|
if (src_device_num != num_devices_openmp
|
|
&& src_device_num != omp_initial_device)
|
|
{
|
|
*src_devicep = resolve_device (src_device_num, false);
|
|
if (*src_devicep == NULL)
|
|
return EINVAL;
|
|
|
|
if (!((*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
|| (*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
*src_devicep = NULL;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
omp_target_memcpy_copy (void *dst, const void *src, size_t length,
|
|
size_t dst_offset, size_t src_offset,
|
|
struct gomp_device_descr *dst_devicep,
|
|
struct gomp_device_descr *src_devicep)
|
|
{
|
|
bool ret;
|
|
if (src_devicep == NULL && dst_devicep == NULL)
|
|
{
|
|
memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
|
|
return 0;
|
|
}
|
|
if (src_devicep == NULL)
|
|
{
|
|
gomp_mutex_lock (&dst_devicep->lock);
|
|
ret = dst_devicep->host2dev_func (dst_devicep->target_id,
|
|
(char *) dst + dst_offset,
|
|
(char *) src + src_offset, length);
|
|
gomp_mutex_unlock (&dst_devicep->lock);
|
|
return (ret ? 0 : EINVAL);
|
|
}
|
|
if (dst_devicep == NULL)
|
|
{
|
|
gomp_mutex_lock (&src_devicep->lock);
|
|
ret = src_devicep->dev2host_func (src_devicep->target_id,
|
|
(char *) dst + dst_offset,
|
|
(char *) src + src_offset, length);
|
|
gomp_mutex_unlock (&src_devicep->lock);
|
|
return (ret ? 0 : EINVAL);
|
|
}
|
|
if (src_devicep == dst_devicep)
|
|
{
|
|
gomp_mutex_lock (&src_devicep->lock);
|
|
ret = src_devicep->dev2dev_func (src_devicep->target_id,
|
|
(char *) dst + dst_offset,
|
|
(char *) src + src_offset, length);
|
|
gomp_mutex_unlock (&src_devicep->lock);
|
|
return (ret ? 0 : EINVAL);
|
|
}
|
|
return EINVAL;
|
|
}
|
|
|
|
int
|
|
omp_target_memcpy (void *dst, const void *src, size_t length, size_t dst_offset,
|
|
size_t src_offset, int dst_device_num, int src_device_num)
|
|
{
|
|
struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
|
|
int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
|
|
&dst_devicep, &src_devicep);
|
|
|
|
if (ret)
|
|
return ret;
|
|
|
|
ret = omp_target_memcpy_copy (dst, src, length, dst_offset, src_offset,
|
|
dst_devicep, src_devicep);
|
|
|
|
return ret;
|
|
}
|
|
|
|
typedef struct
|
|
{
|
|
void *dst;
|
|
const void *src;
|
|
size_t length;
|
|
size_t dst_offset;
|
|
size_t src_offset;
|
|
struct gomp_device_descr *dst_devicep;
|
|
struct gomp_device_descr *src_devicep;
|
|
} omp_target_memcpy_data;
|
|
|
|
static void
|
|
omp_target_memcpy_async_helper (void *args)
|
|
{
|
|
omp_target_memcpy_data *a = args;
|
|
if (omp_target_memcpy_copy (a->dst, a->src, a->length, a->dst_offset,
|
|
a->src_offset, a->dst_devicep, a->src_devicep))
|
|
gomp_fatal ("omp_target_memcpy failed");
|
|
}
|
|
|
|
int
|
|
omp_target_memcpy_async (void *dst, const void *src, size_t length,
|
|
size_t dst_offset, size_t src_offset,
|
|
int dst_device_num, int src_device_num,
|
|
int depobj_count, omp_depend_t *depobj_list)
|
|
{
|
|
struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
|
|
unsigned int flags = 0;
|
|
void *depend[depobj_count + 5];
|
|
int i;
|
|
int check = omp_target_memcpy_check (dst_device_num, src_device_num,
|
|
&dst_devicep, &src_devicep);
|
|
|
|
omp_target_memcpy_data s = {
|
|
.dst = dst,
|
|
.src = src,
|
|
.length = length,
|
|
.dst_offset = dst_offset,
|
|
.src_offset = src_offset,
|
|
.dst_devicep = dst_devicep,
|
|
.src_devicep = src_devicep
|
|
};
|
|
|
|
if (check)
|
|
return check;
|
|
|
|
if (depobj_count > 0 && depobj_list != NULL)
|
|
{
|
|
flags |= GOMP_TASK_FLAG_DEPEND;
|
|
depend[0] = 0;
|
|
depend[1] = (void *) (uintptr_t) depobj_count;
|
|
depend[2] = depend[3] = depend[4] = 0;
|
|
for (i = 0; i < depobj_count; ++i)
|
|
depend[i + 5] = &depobj_list[i];
|
|
}
|
|
|
|
GOMP_task (omp_target_memcpy_async_helper, &s, NULL, sizeof (s),
|
|
__alignof__ (s), true, flags, depend, 0, NULL);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
|
|
int num_dims, const size_t *volume,
|
|
const size_t *dst_offsets,
|
|
const size_t *src_offsets,
|
|
const size_t *dst_dimensions,
|
|
const size_t *src_dimensions,
|
|
struct gomp_device_descr *dst_devicep,
|
|
struct gomp_device_descr *src_devicep,
|
|
size_t *tmp_size, void **tmp)
|
|
{
|
|
size_t dst_slice = element_size;
|
|
size_t src_slice = element_size;
|
|
size_t j, dst_off, src_off, length;
|
|
int i, ret;
|
|
|
|
if (num_dims == 1)
|
|
{
|
|
if (__builtin_mul_overflow (element_size, volume[0], &length)
|
|
|| __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
|
|
|| __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
|
|
return EINVAL;
|
|
if (dst_devicep == NULL && src_devicep == NULL)
|
|
{
|
|
memcpy ((char *) dst + dst_off, (const char *) src + src_off,
|
|
length);
|
|
ret = 1;
|
|
}
|
|
else if (src_devicep == NULL)
|
|
ret = dst_devicep->host2dev_func (dst_devicep->target_id,
|
|
(char *) dst + dst_off,
|
|
(const char *) src + src_off,
|
|
length);
|
|
else if (dst_devicep == NULL)
|
|
ret = src_devicep->dev2host_func (src_devicep->target_id,
|
|
(char *) dst + dst_off,
|
|
(const char *) src + src_off,
|
|
length);
|
|
else if (src_devicep == dst_devicep)
|
|
ret = src_devicep->dev2dev_func (src_devicep->target_id,
|
|
(char *) dst + dst_off,
|
|
(const char *) src + src_off,
|
|
length);
|
|
else
|
|
{
|
|
if (*tmp_size == 0)
|
|
{
|
|
*tmp_size = length;
|
|
*tmp = malloc (length);
|
|
if (*tmp == NULL)
|
|
return ENOMEM;
|
|
}
|
|
else if (*tmp_size < length)
|
|
{
|
|
*tmp_size = length;
|
|
free (*tmp);
|
|
*tmp = malloc (length);
|
|
if (*tmp == NULL)
|
|
return ENOMEM;
|
|
}
|
|
ret = src_devicep->dev2host_func (src_devicep->target_id, *tmp,
|
|
(const char *) src + src_off,
|
|
length);
|
|
if (ret == 1)
|
|
ret = dst_devicep->host2dev_func (dst_devicep->target_id,
|
|
(char *) dst + dst_off, *tmp,
|
|
length);
|
|
}
|
|
return ret ? 0 : EINVAL;
|
|
}
|
|
|
|
/* host->device, device->host and intra device. */
|
|
if (num_dims == 2
|
|
&& ((src_devicep
|
|
&& src_devicep == dst_devicep
|
|
&& src_devicep->memcpy2d_func)
|
|
|| (!src_devicep != !dst_devicep
|
|
&& ((src_devicep && src_devicep->memcpy2d_func)
|
|
|| (dst_devicep && dst_devicep->memcpy2d_func)))))
|
|
{
|
|
size_t vol_sz1, dst_sz1, src_sz1, dst_off_sz1, src_off_sz1;
|
|
int dst_id = dst_devicep ? dst_devicep->target_id : -1;
|
|
int src_id = src_devicep ? src_devicep->target_id : -1;
|
|
struct gomp_device_descr *devp = dst_devicep ? dst_devicep : src_devicep;
|
|
|
|
if (__builtin_mul_overflow (volume[1], element_size, &vol_sz1)
|
|
|| __builtin_mul_overflow (dst_dimensions[1], element_size, &dst_sz1)
|
|
|| __builtin_mul_overflow (src_dimensions[1], element_size, &src_sz1)
|
|
|| __builtin_mul_overflow (dst_offsets[1], element_size, &dst_off_sz1)
|
|
|| __builtin_mul_overflow (src_offsets[1], element_size,
|
|
&src_off_sz1))
|
|
return EINVAL;
|
|
ret = devp->memcpy2d_func (dst_id, src_id, vol_sz1, volume[0],
|
|
dst, dst_off_sz1, dst_offsets[0], dst_sz1,
|
|
src, src_off_sz1, src_offsets[0], src_sz1);
|
|
if (ret != -1)
|
|
return ret ? 0 : EINVAL;
|
|
}
|
|
else if (num_dims == 3
|
|
&& ((src_devicep
|
|
&& src_devicep == dst_devicep
|
|
&& src_devicep->memcpy3d_func)
|
|
|| (!src_devicep != !dst_devicep
|
|
&& ((src_devicep && src_devicep->memcpy3d_func)
|
|
|| (dst_devicep && dst_devicep->memcpy3d_func)))))
|
|
{
|
|
size_t vol_sz2, dst_sz2, src_sz2, dst_off_sz2, src_off_sz2;
|
|
int dst_id = dst_devicep ? dst_devicep->target_id : -1;
|
|
int src_id = src_devicep ? src_devicep->target_id : -1;
|
|
struct gomp_device_descr *devp = dst_devicep ? dst_devicep : src_devicep;
|
|
|
|
if (__builtin_mul_overflow (volume[2], element_size, &vol_sz2)
|
|
|| __builtin_mul_overflow (dst_dimensions[2], element_size, &dst_sz2)
|
|
|| __builtin_mul_overflow (src_dimensions[2], element_size, &src_sz2)
|
|
|| __builtin_mul_overflow (dst_offsets[2], element_size, &dst_off_sz2)
|
|
|| __builtin_mul_overflow (src_offsets[2], element_size,
|
|
&src_off_sz2))
|
|
return EINVAL;
|
|
ret = devp->memcpy3d_func (dst_id, src_id, vol_sz2, volume[1], volume[0],
|
|
dst, dst_off_sz2, dst_offsets[1],
|
|
dst_offsets[0], dst_sz2, dst_dimensions[1],
|
|
src, src_off_sz2, src_offsets[1],
|
|
src_offsets[0], src_sz2, src_dimensions[1]);
|
|
if (ret != -1)
|
|
return ret ? 0 : EINVAL;
|
|
}
|
|
|
|
for (i = 1; i < num_dims; i++)
|
|
if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
|
|
|| __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
|
|
return EINVAL;
|
|
if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
|
|
|| __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
|
|
return EINVAL;
|
|
for (j = 0; j < volume[0]; j++)
|
|
{
|
|
ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
|
|
(const char *) src + src_off,
|
|
element_size, num_dims - 1,
|
|
volume + 1, dst_offsets + 1,
|
|
src_offsets + 1, dst_dimensions + 1,
|
|
src_dimensions + 1, dst_devicep,
|
|
src_devicep, tmp_size, tmp);
|
|
if (ret)
|
|
return ret;
|
|
dst_off += dst_slice;
|
|
src_off += src_slice;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
omp_target_memcpy_rect_check (void *dst, const void *src, int dst_device_num,
|
|
int src_device_num,
|
|
struct gomp_device_descr **dst_devicep,
|
|
struct gomp_device_descr **src_devicep)
|
|
{
|
|
if (!dst && !src)
|
|
return INT_MAX;
|
|
|
|
int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
|
|
dst_devicep, src_devicep);
|
|
if (ret)
|
|
return ret;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
omp_target_memcpy_rect_copy (void *dst, const void *src,
|
|
size_t element_size, int num_dims,
|
|
const size_t *volume, const size_t *dst_offsets,
|
|
const size_t *src_offsets,
|
|
const size_t *dst_dimensions,
|
|
const size_t *src_dimensions,
|
|
struct gomp_device_descr *dst_devicep,
|
|
struct gomp_device_descr *src_devicep)
|
|
{
|
|
size_t tmp_size = 0;
|
|
void *tmp = NULL;
|
|
bool lock_src;
|
|
bool lock_dst;
|
|
|
|
lock_src = src_devicep != NULL;
|
|
lock_dst = dst_devicep != NULL && src_devicep != dst_devicep;
|
|
if (lock_src)
|
|
gomp_mutex_lock (&src_devicep->lock);
|
|
if (lock_dst)
|
|
gomp_mutex_lock (&dst_devicep->lock);
|
|
int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
|
|
volume, dst_offsets, src_offsets,
|
|
dst_dimensions, src_dimensions,
|
|
dst_devicep, src_devicep,
|
|
&tmp_size, &tmp);
|
|
if (lock_src)
|
|
gomp_mutex_unlock (&src_devicep->lock);
|
|
if (lock_dst)
|
|
gomp_mutex_unlock (&dst_devicep->lock);
|
|
if (tmp)
|
|
free (tmp);
|
|
|
|
return ret;
|
|
}
|
|
|
|
int
|
|
omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
|
|
int num_dims, const size_t *volume,
|
|
const size_t *dst_offsets,
|
|
const size_t *src_offsets,
|
|
const size_t *dst_dimensions,
|
|
const size_t *src_dimensions,
|
|
int dst_device_num, int src_device_num)
|
|
{
|
|
struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
|
|
|
|
int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
|
|
src_device_num, &dst_devicep,
|
|
&src_devicep);
|
|
|
|
if (check)
|
|
return check;
|
|
|
|
int ret = omp_target_memcpy_rect_copy (dst, src, element_size, num_dims,
|
|
volume, dst_offsets, src_offsets,
|
|
dst_dimensions, src_dimensions,
|
|
dst_devicep, src_devicep);
|
|
|
|
return ret;
|
|
}
|
|
|
|
typedef struct
|
|
{
|
|
void *dst;
|
|
const void *src;
|
|
size_t element_size;
|
|
const size_t *volume;
|
|
const size_t *dst_offsets;
|
|
const size_t *src_offsets;
|
|
const size_t *dst_dimensions;
|
|
const size_t *src_dimensions;
|
|
struct gomp_device_descr *dst_devicep;
|
|
struct gomp_device_descr *src_devicep;
|
|
int num_dims;
|
|
} omp_target_memcpy_rect_data;
|
|
|
|
static void
|
|
omp_target_memcpy_rect_async_helper (void *args)
|
|
{
|
|
omp_target_memcpy_rect_data *a = args;
|
|
int ret = omp_target_memcpy_rect_copy (a->dst, a->src, a->element_size,
|
|
a->num_dims, a->volume, a->dst_offsets,
|
|
a->src_offsets, a->dst_dimensions,
|
|
a->src_dimensions, a->dst_devicep,
|
|
a->src_devicep);
|
|
if (ret)
|
|
gomp_fatal ("omp_target_memcpy_rect failed");
|
|
}
|
|
|
|
int
|
|
omp_target_memcpy_rect_async (void *dst, const void *src, size_t element_size,
|
|
int num_dims, const size_t *volume,
|
|
const size_t *dst_offsets,
|
|
const size_t *src_offsets,
|
|
const size_t *dst_dimensions,
|
|
const size_t *src_dimensions,
|
|
int dst_device_num, int src_device_num,
|
|
int depobj_count, omp_depend_t *depobj_list)
|
|
{
|
|
struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
|
|
unsigned flags = 0;
|
|
int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
|
|
src_device_num, &dst_devicep,
|
|
&src_devicep);
|
|
void *depend[depobj_count + 5];
|
|
int i;
|
|
|
|
omp_target_memcpy_rect_data s = {
|
|
.dst = dst,
|
|
.src = src,
|
|
.element_size = element_size,
|
|
.num_dims = num_dims,
|
|
.volume = volume,
|
|
.dst_offsets = dst_offsets,
|
|
.src_offsets = src_offsets,
|
|
.dst_dimensions = dst_dimensions,
|
|
.src_dimensions = src_dimensions,
|
|
.dst_devicep = dst_devicep,
|
|
.src_devicep = src_devicep
|
|
};
|
|
|
|
if (check)
|
|
return check;
|
|
|
|
if (depobj_count > 0 && depobj_list != NULL)
|
|
{
|
|
flags |= GOMP_TASK_FLAG_DEPEND;
|
|
depend[0] = 0;
|
|
depend[1] = (void *) (uintptr_t) depobj_count;
|
|
depend[2] = depend[3] = depend[4] = 0;
|
|
for (i = 0; i < depobj_count; ++i)
|
|
depend[i + 5] = &depobj_list[i];
|
|
}
|
|
|
|
GOMP_task (omp_target_memcpy_rect_async_helper, &s, NULL, sizeof (s),
|
|
__alignof__ (s), true, flags, depend, 0, NULL);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
|
|
size_t size, size_t device_offset, int device_num)
|
|
{
|
|
if (device_num == omp_initial_device
|
|
|| device_num == gomp_get_num_devices ())
|
|
return EINVAL;
|
|
|
|
struct gomp_device_descr *devicep = resolve_device (device_num, false);
|
|
if (devicep == NULL)
|
|
return EINVAL;
|
|
|
|
if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
|| devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
return EINVAL;
|
|
|
|
gomp_mutex_lock (&devicep->lock);
|
|
|
|
struct splay_tree_s *mem_map = &devicep->mem_map;
|
|
struct splay_tree_key_s cur_node;
|
|
int ret = EINVAL;
|
|
|
|
cur_node.host_start = (uintptr_t) host_ptr;
|
|
cur_node.host_end = cur_node.host_start + size;
|
|
splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
|
|
if (n)
|
|
{
|
|
if (n->tgt->tgt_start + n->tgt_offset
|
|
== (uintptr_t) device_ptr + device_offset
|
|
&& n->host_start <= cur_node.host_start
|
|
&& n->host_end >= cur_node.host_end)
|
|
ret = 0;
|
|
}
|
|
else
|
|
{
|
|
struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
|
|
tgt->array = gomp_malloc (sizeof (*tgt->array));
|
|
tgt->refcount = 1;
|
|
tgt->tgt_start = 0;
|
|
tgt->tgt_end = 0;
|
|
tgt->to_free = NULL;
|
|
tgt->prev = NULL;
|
|
tgt->list_count = 0;
|
|
tgt->device_descr = devicep;
|
|
splay_tree_node array = tgt->array;
|
|
splay_tree_key k = &array->key;
|
|
k->host_start = cur_node.host_start;
|
|
k->host_end = cur_node.host_end;
|
|
k->tgt = tgt;
|
|
k->tgt_offset = (uintptr_t) device_ptr + device_offset;
|
|
k->refcount = REFCOUNT_INFINITY;
|
|
k->dynamic_refcount = 0;
|
|
k->aux = NULL;
|
|
array->left = NULL;
|
|
array->right = NULL;
|
|
splay_tree_insert (&devicep->mem_map, array);
|
|
ret = 0;
|
|
}
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
return ret;
|
|
}
|
|
|
|
int
|
|
omp_target_disassociate_ptr (const void *ptr, int device_num)
|
|
{
|
|
struct gomp_device_descr *devicep = resolve_device (device_num, false);
|
|
if (devicep == NULL)
|
|
return EINVAL;
|
|
|
|
if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
|
|
return EINVAL;
|
|
|
|
gomp_mutex_lock (&devicep->lock);
|
|
|
|
struct splay_tree_s *mem_map = &devicep->mem_map;
|
|
struct splay_tree_key_s cur_node;
|
|
int ret = EINVAL;
|
|
|
|
cur_node.host_start = (uintptr_t) ptr;
|
|
cur_node.host_end = cur_node.host_start;
|
|
splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
|
|
if (n
|
|
&& n->host_start == cur_node.host_start
|
|
&& n->refcount == REFCOUNT_INFINITY
|
|
&& n->tgt->tgt_start == 0
|
|
&& n->tgt->to_free == NULL
|
|
&& n->tgt->refcount == 1
|
|
&& n->tgt->list_count == 0)
|
|
{
|
|
splay_tree_remove (&devicep->mem_map, n);
|
|
gomp_unmap_tgt (n->tgt);
|
|
ret = 0;
|
|
}
|
|
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
return ret;
|
|
}
|
|
|
|
void *
|
|
omp_get_mapped_ptr (const void *ptr, int device_num)
|
|
{
|
|
if (device_num == omp_initial_device
|
|
|| device_num == omp_get_initial_device ())
|
|
return (void *) ptr;
|
|
|
|
struct gomp_device_descr *devicep = resolve_device (device_num, false);
|
|
if (devicep == NULL)
|
|
return NULL;
|
|
|
|
if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
|| devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
return (void *) ptr;
|
|
|
|
gomp_mutex_lock (&devicep->lock);
|
|
|
|
struct splay_tree_s *mem_map = &devicep->mem_map;
|
|
struct splay_tree_key_s cur_node;
|
|
void *ret = NULL;
|
|
|
|
cur_node.host_start = (uintptr_t) ptr;
|
|
cur_node.host_end = cur_node.host_start;
|
|
splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
|
|
|
|
if (n)
|
|
{
|
|
uintptr_t offset = cur_node.host_start - n->host_start;
|
|
ret = (void *) (n->tgt->tgt_start + n->tgt_offset + offset);
|
|
}
|
|
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
|
|
return ret;
|
|
}
|
|
|
|
int
|
|
omp_target_is_accessible (const void *ptr, size_t size, int device_num)
|
|
{
|
|
if (device_num == omp_initial_device
|
|
|| device_num == gomp_get_num_devices ())
|
|
return true;
|
|
|
|
struct gomp_device_descr *devicep = resolve_device (device_num, false);
|
|
if (devicep == NULL)
|
|
return false;
|
|
|
|
/* TODO: Unified shared memory must be handled when available. */
|
|
|
|
return devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM;
|
|
}
|
|
|
|
int
|
|
omp_pause_resource (omp_pause_resource_t kind, int device_num)
|
|
{
|
|
(void) kind;
|
|
if (device_num == omp_initial_device
|
|
|| device_num == gomp_get_num_devices ())
|
|
return gomp_pause_host ();
|
|
|
|
struct gomp_device_descr *devicep = resolve_device (device_num, false);
|
|
if (devicep == NULL)
|
|
return -1;
|
|
|
|
/* Do nothing for target devices for now. */
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
omp_pause_resource_all (omp_pause_resource_t kind)
|
|
{
|
|
(void) kind;
|
|
if (gomp_pause_host ())
|
|
return -1;
|
|
/* Do nothing for target devices for now. */
|
|
return 0;
|
|
}
|
|
|
|
ialias (omp_pause_resource)
|
|
ialias (omp_pause_resource_all)
|
|
|
|
#ifdef PLUGIN_SUPPORT
|
|
|
|
/* This function tries to load a plugin for DEVICE. Name of plugin is passed
|
|
in PLUGIN_NAME.
|
|
The handles of the found functions are stored in the corresponding fields
|
|
of DEVICE. The function returns TRUE on success and FALSE otherwise. */
|
|
|
|
static bool
|
|
gomp_load_plugin_for_device (struct gomp_device_descr *device,
|
|
const char *plugin_name)
|
|
{
|
|
const char *err = NULL, *last_missing = NULL;
|
|
|
|
void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
|
|
if (!plugin_handle)
|
|
#if OFFLOAD_DEFAULTED
|
|
return 0;
|
|
#else
|
|
goto dl_fail;
|
|
#endif
|
|
|
|
/* Check if all required functions are available in the plugin and store
|
|
their handlers. None of the symbols can legitimately be NULL,
|
|
so we don't need to check dlerror all the time. */
|
|
#define DLSYM(f) \
|
|
if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
|
|
goto dl_fail
|
|
/* Similar, but missing functions are not an error. Return false if
|
|
failed, true otherwise. */
|
|
#define DLSYM_OPT(f, n) \
|
|
((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
|
|
|| (last_missing = #n, 0))
|
|
|
|
DLSYM (version);
|
|
if (device->version_func () != GOMP_VERSION)
|
|
{
|
|
err = "plugin version mismatch";
|
|
goto fail;
|
|
}
|
|
|
|
DLSYM (get_name);
|
|
DLSYM (get_caps);
|
|
DLSYM (get_type);
|
|
DLSYM (get_num_devices);
|
|
DLSYM (init_device);
|
|
DLSYM (fini_device);
|
|
DLSYM (load_image);
|
|
DLSYM (unload_image);
|
|
DLSYM (alloc);
|
|
DLSYM (free);
|
|
DLSYM (dev2host);
|
|
DLSYM (host2dev);
|
|
DLSYM_OPT (memcpy2d, memcpy2d);
|
|
DLSYM_OPT (memcpy3d, memcpy3d);
|
|
device->capabilities = device->get_caps_func ();
|
|
if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
{
|
|
DLSYM (run);
|
|
DLSYM_OPT (async_run, async_run);
|
|
DLSYM_OPT (can_run, can_run);
|
|
DLSYM (dev2dev);
|
|
}
|
|
if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
|
|
{
|
|
if (!DLSYM_OPT (openacc.exec, openacc_exec)
|
|
|| !DLSYM_OPT (openacc.create_thread_data,
|
|
openacc_create_thread_data)
|
|
|| !DLSYM_OPT (openacc.destroy_thread_data,
|
|
openacc_destroy_thread_data)
|
|
|| !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
|
|
|| !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
|
|
|| !DLSYM_OPT (openacc.async.test, openacc_async_test)
|
|
|| !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
|
|
|| !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
|
|
|| !DLSYM_OPT (openacc.async.queue_callback,
|
|
openacc_async_queue_callback)
|
|
|| !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
|
|
|| !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
|
|
|| !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
|
|
|| !DLSYM_OPT (openacc.get_property, openacc_get_property))
|
|
{
|
|
/* Require all the OpenACC handlers if we have
|
|
GOMP_OFFLOAD_CAP_OPENACC_200. */
|
|
err = "plugin missing OpenACC handler function";
|
|
goto fail;
|
|
}
|
|
|
|
unsigned cuda = 0;
|
|
cuda += DLSYM_OPT (openacc.cuda.get_current_device,
|
|
openacc_cuda_get_current_device);
|
|
cuda += DLSYM_OPT (openacc.cuda.get_current_context,
|
|
openacc_cuda_get_current_context);
|
|
cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
|
|
cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
|
|
if (cuda && cuda != 4)
|
|
{
|
|
/* Make sure all the CUDA functions are there if any of them are. */
|
|
err = "plugin missing OpenACC CUDA handler function";
|
|
goto fail;
|
|
}
|
|
}
|
|
#undef DLSYM
|
|
#undef DLSYM_OPT
|
|
|
|
return 1;
|
|
|
|
dl_fail:
|
|
err = dlerror ();
|
|
fail:
|
|
gomp_error ("while loading %s: %s", plugin_name, err);
|
|
if (last_missing)
|
|
gomp_error ("missing function was %s", last_missing);
|
|
if (plugin_handle)
|
|
dlclose (plugin_handle);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* This function finalizes all initialized devices. */
|
|
|
|
static void
|
|
gomp_target_fini (void)
|
|
{
|
|
int i;
|
|
for (i = 0; i < num_devices; i++)
|
|
{
|
|
bool ret = true;
|
|
struct gomp_device_descr *devicep = &devices[i];
|
|
gomp_mutex_lock (&devicep->lock);
|
|
if (devicep->state == GOMP_DEVICE_INITIALIZED)
|
|
ret = gomp_fini_device (devicep);
|
|
gomp_mutex_unlock (&devicep->lock);
|
|
if (!ret)
|
|
gomp_fatal ("device finalization failed");
|
|
}
|
|
}
|
|
|
|
/* This function initializes the runtime for offloading.
|
|
It parses the list of offload plugins, and tries to load these.
|
|
On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
|
|
will be set, and the array DEVICES initialized, containing descriptors for
|
|
corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
|
|
by the others. */
|
|
|
|
static void
|
|
gomp_target_init (void)
|
|
{
|
|
const char *prefix ="libgomp-plugin-";
|
|
const char *suffix = SONAME_SUFFIX (1);
|
|
const char *cur, *next;
|
|
char *plugin_name;
|
|
int i, new_num_devs;
|
|
int num_devs = 0, num_devs_openmp;
|
|
struct gomp_device_descr *devs = NULL;
|
|
|
|
if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
|
|
return;
|
|
|
|
cur = OFFLOAD_PLUGINS;
|
|
if (*cur)
|
|
do
|
|
{
|
|
struct gomp_device_descr current_device;
|
|
size_t prefix_len, suffix_len, cur_len;
|
|
|
|
next = strchr (cur, ',');
|
|
|
|
prefix_len = strlen (prefix);
|
|
cur_len = next ? next - cur : strlen (cur);
|
|
suffix_len = strlen (suffix);
|
|
|
|
plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
|
|
if (!plugin_name)
|
|
{
|
|
num_devs = 0;
|
|
break;
|
|
}
|
|
|
|
memcpy (plugin_name, prefix, prefix_len);
|
|
memcpy (plugin_name + prefix_len, cur, cur_len);
|
|
memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
|
|
|
|
if (gomp_load_plugin_for_device (¤t_device, plugin_name))
|
|
{
|
|
int omp_req = omp_requires_mask & ~GOMP_REQUIRES_TARGET_USED;
|
|
new_num_devs = current_device.get_num_devices_func (omp_req);
|
|
if (gomp_debug_var > 0 && new_num_devs < 0)
|
|
{
|
|
bool found = false;
|
|
int type = current_device.get_type_func ();
|
|
for (int img = 0; img < num_offload_images; img++)
|
|
if (type == offload_images[img].type)
|
|
found = true;
|
|
if (found)
|
|
{
|
|
char buf[sizeof ("unified_address, unified_shared_memory, "
|
|
"reverse_offload")];
|
|
gomp_requires_to_name (buf, sizeof (buf), omp_req);
|
|
char *name = (char *) malloc (cur_len + 1);
|
|
memcpy (name, cur, cur_len);
|
|
name[cur_len] = '\0';
|
|
gomp_debug (1,
|
|
"%s devices present but 'omp requires %s' "
|
|
"cannot be fulfilled\n", name, buf);
|
|
free (name);
|
|
}
|
|
}
|
|
else if (new_num_devs >= 1)
|
|
{
|
|
/* Augment DEVICES and NUM_DEVICES. */
|
|
|
|
/* If USM has been requested and is supported by all devices
|
|
of this type, set the capability accordingly. */
|
|
if (omp_requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
|
|
current_device.capabilities |= GOMP_OFFLOAD_CAP_SHARED_MEM;
|
|
|
|
devs = realloc (devs, (num_devs + new_num_devs)
|
|
* sizeof (struct gomp_device_descr));
|
|
if (!devs)
|
|
{
|
|
num_devs = 0;
|
|
free (plugin_name);
|
|
break;
|
|
}
|
|
|
|
current_device.name = current_device.get_name_func ();
|
|
/* current_device.capabilities has already been set. */
|
|
current_device.type = current_device.get_type_func ();
|
|
current_device.mem_map.root = NULL;
|
|
current_device.mem_map_rev.root = NULL;
|
|
current_device.state = GOMP_DEVICE_UNINITIALIZED;
|
|
for (i = 0; i < new_num_devs; i++)
|
|
{
|
|
current_device.target_id = i;
|
|
devs[num_devs] = current_device;
|
|
gomp_mutex_init (&devs[num_devs].lock);
|
|
num_devs++;
|
|
}
|
|
}
|
|
}
|
|
|
|
free (plugin_name);
|
|
cur = next + 1;
|
|
}
|
|
while (next);
|
|
|
|
/* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
|
|
NUM_DEVICES_OPENMP. */
|
|
struct gomp_device_descr *devs_s
|
|
= malloc (num_devs * sizeof (struct gomp_device_descr));
|
|
if (!devs_s)
|
|
{
|
|
num_devs = 0;
|
|
free (devs);
|
|
devs = NULL;
|
|
}
|
|
num_devs_openmp = 0;
|
|
for (i = 0; i < num_devs; i++)
|
|
if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|
|
devs_s[num_devs_openmp++] = devs[i];
|
|
int num_devs_after_openmp = num_devs_openmp;
|
|
for (i = 0; i < num_devs; i++)
|
|
if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
|
|
devs_s[num_devs_after_openmp++] = devs[i];
|
|
free (devs);
|
|
devs = devs_s;
|
|
|
|
for (i = 0; i < num_devs; i++)
|
|
{
|
|
/* The 'devices' array can be moved (by the realloc call) until we have
|
|
found all the plugins, so registering with the OpenACC runtime (which
|
|
takes a copy of the pointer argument) must be delayed until now. */
|
|
if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
|
|
goacc_register (&devs[i]);
|
|
}
|
|
if (gomp_global_icv.default_device_var == INT_MIN)
|
|
{
|
|
/* This implies OMP_TARGET_OFFLOAD=mandatory. */
|
|
struct gomp_icv_list *none;
|
|
none = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_NO_SUFFIX);
|
|
gomp_global_icv.default_device_var = (num_devs_openmp
|
|
? 0 : omp_invalid_device);
|
|
none->icvs.default_device_var = gomp_global_icv.default_device_var;
|
|
}
|
|
|
|
num_devices = num_devs;
|
|
num_devices_openmp = num_devs_openmp;
|
|
devices = devs;
|
|
if (atexit (gomp_target_fini) != 0)
|
|
gomp_fatal ("atexit failed");
|
|
}
|
|
|
|
#else /* PLUGIN_SUPPORT */
|
|
/* If dlfcn.h is unavailable we always fallback to host execution.
|
|
GOMP_target* routines are just stubs for this case. */
|
|
static void
|
|
gomp_target_init (void)
|
|
{
|
|
}
|
|
#endif /* PLUGIN_SUPPORT */
|