mirror of
https://gcc.gnu.org/git/gcc.git
synced 2024-11-24 11:24:05 +08:00
libgomp: Fix hang when profiling OpenACC programs with CUDA 9.0 nvprof
The version of nvprof in CUDA 9.0 causes a hang when used to profile an OpenACC program. This is because it calls acc_get_device_type from a callback called during device initialization, which then attempts to acquire acc_device_lock while it is already taken, resulting in deadlock. This works around the issue by returning acc_device_none from acc_get_device_type without attempting to acquire the lock when initialization has not completed yet. 2020-07-14 Tom de Vries <tom@codesourcery.com> Cesar Philippidis <cesar@codesourcery.com> Thomas Schwinge <thomas@codesourcery.com> Kwok Cheung Yeung <kcy@codesourcery.com> libgomp/ * oacc-init.c (acc_init_state_lock, acc_init_state, acc_init_thread): New variable. (acc_init_1): Set acc_init_thread to pthread_self (). Set acc_init_state to initializing at the start, and to initialized at the end. (self_initializing_p): New function. (acc_get_device_type): Return acc_device_none if called by thread that is currently executing acc_init_1. * libgomp.texi (acc_get_device_type): Update documentation. (Implementation Status and Implementation-Defined Behavior): Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-2.c: New.
This commit is contained in:
parent
bae45b8be5
commit
b52643ab90
@ -1967,6 +1967,12 @@ in @var{devicetype}, to use when executing a parallel or kernels region.
|
||||
This function returns what device type will be used when executing a
|
||||
parallel or kernels region.
|
||||
|
||||
This function returns @code{acc_device_none} if
|
||||
@code{acc_get_device_type} is called from
|
||||
@code{acc_ev_device_init_start}, @code{acc_ev_device_init_end}
|
||||
callbacks of the OpenACC Profiling Interface (@ref{OpenACC Profiling
|
||||
Interface}), that is, if the device is currently being initialized.
|
||||
|
||||
@item @emph{C/C++}:
|
||||
@multitable @columnfractions .20 .80
|
||||
@item @emph{Prototype}: @tab @code{acc_device_t acc_get_device_type(void);}
|
||||
@ -3382,6 +3388,11 @@ every event that has been registered.
|
||||
|
||||
We're not yet accounting for the fact that @cite{OpenACC events may
|
||||
occur during event processing}.
|
||||
We just handle one case specially, as required by CUDA 9.0
|
||||
@command{nvprof}, that @code{acc_get_device_type}
|
||||
(@ref{acc_get_device_type})) may be called from
|
||||
@code{acc_ev_device_init_start}, @code{acc_ev_device_init_end}
|
||||
callbacks.
|
||||
|
||||
We're not yet implementing initialization via a
|
||||
@code{acc_register_library} function that is either statically linked
|
||||
|
@ -40,6 +40,11 @@
|
||||
|
||||
static gomp_mutex_t acc_device_lock;
|
||||
|
||||
static gomp_mutex_t acc_init_state_lock;
|
||||
static enum { uninitialized, initializing, initialized } acc_init_state
|
||||
= uninitialized;
|
||||
static pthread_t acc_init_thread;
|
||||
|
||||
/* A cached version of the dispatcher for the global "current" accelerator type,
|
||||
e.g. used as the default when creating new host threads. This is the
|
||||
device-type equivalent of goacc_device_num (which specifies which device to
|
||||
@ -228,6 +233,11 @@ acc_dev_num_out_of_range (acc_device_t d, int ord, int ndevs)
|
||||
static struct gomp_device_descr *
|
||||
acc_init_1 (acc_device_t d, acc_construct_t parent_construct, int implicit)
|
||||
{
|
||||
gomp_mutex_lock (&acc_init_state_lock);
|
||||
acc_init_state = initializing;
|
||||
acc_init_thread = pthread_self ();
|
||||
gomp_mutex_unlock (&acc_init_state_lock);
|
||||
|
||||
bool check_not_nested_p;
|
||||
if (implicit)
|
||||
{
|
||||
@ -317,6 +327,14 @@ acc_init_1 (acc_device_t d, acc_construct_t parent_construct, int implicit)
|
||||
&api_info);
|
||||
}
|
||||
|
||||
/* We're setting 'initialized' *after* 'goacc_profiling_dispatch', so that a
|
||||
nested 'acc_get_device_type' called from a profiling callback still sees
|
||||
'initializing', so that we don't deadlock when it then again tries to lock
|
||||
'goacc_prof_lock'. See also the discussion in 'acc_get_device_type'. */
|
||||
gomp_mutex_lock (&acc_init_state_lock);
|
||||
acc_init_state = initialized;
|
||||
gomp_mutex_unlock (&acc_init_state_lock);
|
||||
|
||||
return base_dev;
|
||||
}
|
||||
|
||||
@ -643,6 +661,17 @@ acc_set_device_type (acc_device_t d)
|
||||
|
||||
ialias (acc_set_device_type)
|
||||
|
||||
static bool
|
||||
self_initializing_p (void)
|
||||
{
|
||||
bool res;
|
||||
gomp_mutex_lock (&acc_init_state_lock);
|
||||
res = (acc_init_state == initializing
|
||||
&& pthread_equal (acc_init_thread, pthread_self ()));
|
||||
gomp_mutex_unlock (&acc_init_state_lock);
|
||||
return res;
|
||||
}
|
||||
|
||||
acc_device_t
|
||||
acc_get_device_type (void)
|
||||
{
|
||||
@ -652,6 +681,15 @@ acc_get_device_type (void)
|
||||
|
||||
if (thr && thr->base_dev)
|
||||
res = acc_device_type (thr->base_dev->type);
|
||||
else if (self_initializing_p ())
|
||||
/* The Cuda libaccinj64.so version 9.0+ calls acc_get_device_type during the
|
||||
acc_ev_device_init_start event callback, which is dispatched during
|
||||
acc_init_1. Trying to lock acc_device_lock during such a call (as we do
|
||||
in the else clause below), will result in deadlock, since the lock has
|
||||
already been taken by the acc_init_1 caller. We work around this problem
|
||||
by using the acc_get_device_type property "If the device type has not yet
|
||||
been selected, the value acc_device_none may be returned". */
|
||||
;
|
||||
else
|
||||
{
|
||||
acc_prof_info prof_info;
|
||||
|
@ -0,0 +1,80 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-timeout 10 } */
|
||||
|
||||
/* Test the calling of 'acc_get_device_type' from within
|
||||
'cb_device_init_start' and 'cb_device_init_end' callbacks. This occurs
|
||||
when the CUDA 9.0 'nvprof' tool is used, and previously deadlocked. */
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <acc_prof.h>
|
||||
|
||||
static acc_prof_reg reg;
|
||||
static acc_prof_reg unreg;
|
||||
static acc_prof_lookup_func lookup;
|
||||
|
||||
void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_lookup_func lookup_)
|
||||
{
|
||||
reg = reg_;
|
||||
unreg = unreg_;
|
||||
lookup = lookup_;
|
||||
}
|
||||
|
||||
static bool expect_cb_device_init_start;
|
||||
static bool expect_cb_device_init_end;
|
||||
|
||||
static void cb_device_init_start (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
|
||||
{
|
||||
assert (expect_cb_device_init_start);
|
||||
expect_cb_device_init_start = false;
|
||||
|
||||
acc_device_t acc_device_type;
|
||||
acc_device_type = acc_get_device_type ();
|
||||
assert (acc_device_type == acc_device_none);
|
||||
|
||||
expect_cb_device_init_end = true;
|
||||
}
|
||||
|
||||
static void cb_device_init_end (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
|
||||
{
|
||||
assert (expect_cb_device_init_end);
|
||||
expect_cb_device_init_end = false;
|
||||
|
||||
acc_device_t acc_device_type;
|
||||
acc_device_type = acc_get_device_type ();
|
||||
assert (acc_device_type == acc_device_none);
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
|
||||
|
||||
reg (acc_ev_device_init_start, cb_device_init_start, acc_reg);
|
||||
reg (acc_ev_device_init_end, cb_device_init_end, acc_reg);
|
||||
|
||||
expect_cb_device_init_start = true;
|
||||
expect_cb_device_init_end = false;
|
||||
acc_init (acc_device_host);
|
||||
assert (!expect_cb_device_init_start);
|
||||
assert (!expect_cb_device_init_end);
|
||||
{
|
||||
acc_device_t acc_device_type;
|
||||
acc_device_type = acc_get_device_type ();
|
||||
assert (acc_device_type == acc_device_host);
|
||||
}
|
||||
acc_shutdown (acc_device_host);
|
||||
|
||||
expect_cb_device_init_start = true;
|
||||
expect_cb_device_init_end = false;
|
||||
acc_init (acc_device_default);
|
||||
assert (!expect_cb_device_init_start);
|
||||
assert (!expect_cb_device_init_end);
|
||||
{
|
||||
acc_device_t acc_device_type;
|
||||
acc_device_type = acc_get_device_type ();
|
||||
assert (acc_device_type != acc_device_none);
|
||||
}
|
||||
acc_shutdown (acc_device_default);
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user