mirror of
https://gcc.gnu.org/git/gcc.git
synced 2024-12-13 22:03:43 +08:00
Enhance further testcases to verify handling of OpenACC privatization level [PR90115]
As originally introduced in commit 11b8286a83
"[OpenACC privatization] Largely extend diagnostics and corresponding testsuite
coverage [PR90115]".
PR middle-end/90115
libgomp/
* testsuite/libgomp.oacc-c-c++-common/default-1.c: Enhance.
* testsuite/libgomp.oacc-c-c++-common/kernels-reduction-1.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Likewise.
* testsuite/libgomp.oacc-fortran/kernels-reduction-1.f90: Likewise.
This commit is contained in:
parent
337ed336d7
commit
2e53fa7bb2
@ -1,4 +1,18 @@
|
|||||||
/* { dg-do run } */
|
/* { dg-additional-options "-fopt-info-all-omp" }
|
||||||
|
{ dg-additional-options "-foffload=-fopt-info-all-omp" } */
|
||||||
|
|
||||||
|
/* { dg-additional-options "--param=openacc-privatization=noisy" }
|
||||||
|
{ dg-additional-options "-foffload=--param=openacc-privatization=noisy" }
|
||||||
|
Prune a few: uninteresting, and potentially varying depending on GCC configuration (data types):
|
||||||
|
{ dg-prune-output {note: variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} } */
|
||||||
|
|
||||||
|
/* It's only with Tcl 8.5 (released in 2007) that "the variable 'varName'
|
||||||
|
passed to 'incr' may be unset, and in that case, it will be set to [...]",
|
||||||
|
so to maintain compatibility with earlier Tcl releases, we manually
|
||||||
|
initialize counter variables:
|
||||||
|
{ dg-line l_dummy[variable c_compute 0 c_loop_i 0] }
|
||||||
|
{ dg-message dummy {} { target iN-VAl-Id } l_dummy } to avoid
|
||||||
|
"WARNING: dg-line var l_dummy defined, but not used". */
|
||||||
|
|
||||||
#include <openacc.h>
|
#include <openacc.h>
|
||||||
|
|
||||||
@ -13,10 +27,15 @@ int test_parallel ()
|
|||||||
ary[i] = ~0;
|
ary[i] = ~0;
|
||||||
|
|
||||||
/* val defaults to firstprivate, ary defaults to copy. */
|
/* val defaults to firstprivate, ary defaults to copy. */
|
||||||
#pragma acc parallel num_gangs (32) copy (ok) copy(ondev)
|
#pragma acc parallel num_gangs (32) copy (ok) copy(ondev) /* { dg-line l_compute[incr c_compute] } */
|
||||||
|
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||||
{
|
{
|
||||||
ondev = acc_on_device (acc_device_not_host);
|
ondev = acc_on_device (acc_device_not_host);
|
||||||
#pragma acc loop gang(static:1)
|
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
|
||||||
|
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
|
||||||
|
#pragma acc loop gang(static:1) /* { dg-line l_loop_i[incr c_loop_i] } */
|
||||||
|
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||||
|
/* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||||
for (unsigned i = 0; i < 32; i++)
|
for (unsigned i = 0; i < 32; i++)
|
||||||
{
|
{
|
||||||
if (val != 2)
|
if (val != 2)
|
||||||
@ -51,10 +70,13 @@ int test_kernels ()
|
|||||||
ary[i] = ~0;
|
ary[i] = ~0;
|
||||||
|
|
||||||
/* val defaults to copy, ary defaults to copy. */
|
/* val defaults to copy, ary defaults to copy. */
|
||||||
#pragma acc kernels copy(ondev)
|
#pragma acc kernels copy(ondev) /* { dg-line l_compute[incr c_compute] } */
|
||||||
|
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||||
|
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute$c_compute } */
|
||||||
{
|
{
|
||||||
ondev = acc_on_device (acc_device_not_host);
|
ondev = acc_on_device (acc_device_not_host);
|
||||||
#pragma acc loop
|
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */
|
||||||
|
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||||
for (unsigned i = 0; i < 32; i++)
|
for (unsigned i = 0; i < 32; i++)
|
||||||
{
|
{
|
||||||
ary[i] = val;
|
ary[i] = val;
|
||||||
|
@ -1,6 +1,14 @@
|
|||||||
/* Verify that a simple, explicit acc loop reduction works inside
|
/* Verify that a simple, explicit acc loop reduction works inside
|
||||||
a kernels region. */
|
a kernels region. */
|
||||||
|
|
||||||
|
/* { dg-additional-options "-fopt-info-all-omp" }
|
||||||
|
{ dg-additional-options "-foffload=-fopt-info-all-omp" } */
|
||||||
|
|
||||||
|
/* { dg-additional-options "--param=openacc-privatization=noisy" }
|
||||||
|
{ dg-additional-options "-foffload=--param=openacc-privatization=noisy" }
|
||||||
|
Prune a few: uninteresting, and potentially varying depending on GCC configuration (data types):
|
||||||
|
{ dg-prune-output {note: variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} } */
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
#define N 100
|
#define N 100
|
||||||
@ -10,9 +18,11 @@ main ()
|
|||||||
{
|
{
|
||||||
int i, red = 0;
|
int i, red = 0;
|
||||||
|
|
||||||
#pragma acc kernels
|
#pragma acc kernels /* { dg-line l_compute1 } */
|
||||||
|
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute1 } */
|
||||||
{
|
{
|
||||||
#pragma acc loop reduction (+:red)
|
#pragma acc loop reduction (+:red) /* { dg-line l_loop_i1 } */
|
||||||
|
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i1 } */
|
||||||
for (i = 0; i < N; i++)
|
for (i = 0; i < N; i++)
|
||||||
red++;
|
red++;
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,22 @@
|
|||||||
/* OpenACC parallelism dimensions clauses: num_gangs, num_workers,
|
/* OpenACC parallelism dimensions clauses: num_gangs, num_workers,
|
||||||
vector_length. */
|
vector_length. */
|
||||||
|
|
||||||
|
/* { dg-additional-options "-fopt-info-all-omp" }
|
||||||
|
{ dg-additional-options "-foffload=-fopt-info-all-omp" } */
|
||||||
|
|
||||||
|
/* { dg-additional-options "--param=openacc-privatization=noisy" }
|
||||||
|
{ dg-additional-options "-foffload=--param=openacc-privatization=noisy" }
|
||||||
|
Prune a few: uninteresting, and potentially varying depending on GCC configuration (data types):
|
||||||
|
{ dg-prune-output {note: variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} } */
|
||||||
|
|
||||||
|
/* It's only with Tcl 8.5 (released in 2007) that "the variable 'varName'
|
||||||
|
passed to 'incr' may be unset, and in that case, it will be set to [...]",
|
||||||
|
so to maintain compatibility with earlier Tcl releases, we manually
|
||||||
|
initialize counter variables:
|
||||||
|
{ dg-line l_dummy[variable c_compute 0 c_loop_i 0 c_loop_j 0 c_loop_k 0] }
|
||||||
|
{ dg-message dummy {} { target iN-VAl-Id } l_dummy } to avoid
|
||||||
|
"WARNING: dg-line var l_dummy defined, but not used". */
|
||||||
|
|
||||||
/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting
|
/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting
|
||||||
aspects of that functionality. */
|
aspects of that functionality. */
|
||||||
|
|
||||||
@ -11,18 +27,21 @@
|
|||||||
#include <gomp-constants.h>
|
#include <gomp-constants.h>
|
||||||
|
|
||||||
#pragma acc routine seq
|
#pragma acc routine seq
|
||||||
|
inline __attribute__ ((always_inline))
|
||||||
static int acc_gang ()
|
static int acc_gang ()
|
||||||
{
|
{
|
||||||
return __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
|
return __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma acc routine seq
|
#pragma acc routine seq
|
||||||
|
inline __attribute__ ((always_inline))
|
||||||
static int acc_worker ()
|
static int acc_worker ()
|
||||||
{
|
{
|
||||||
return __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
|
return __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma acc routine seq
|
#pragma acc routine seq
|
||||||
|
inline __attribute__ ((always_inline))
|
||||||
static int acc_vector ()
|
static int acc_vector ()
|
||||||
{
|
{
|
||||||
return __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
|
return __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
|
||||||
@ -39,14 +58,19 @@ int main ()
|
|||||||
|
|
||||||
/* GR, WS, VS. */
|
/* GR, WS, VS. */
|
||||||
{
|
{
|
||||||
#define GANGS 0 /* { dg-warning "'num_gangs' value must be positive" "" { target c } } */
|
#define GANGS 0
|
||||||
|
/* { dg-warning {'num_gangs' value must be positive} {} { target c } .-1 } */
|
||||||
int gangs_actual = GANGS;
|
int gangs_actual = GANGS;
|
||||||
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
||||||
gangs_min = workers_min = vectors_min = INT_MAX;
|
gangs_min = workers_min = vectors_min = INT_MAX;
|
||||||
gangs_max = workers_max = vectors_max = INT_MIN;
|
gangs_max = workers_max = vectors_max = INT_MIN;
|
||||||
#pragma acc parallel copy (gangs_actual) \
|
#pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
|
||||||
|
copy (gangs_actual) \
|
||||||
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) \
|
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) \
|
||||||
num_gangs (GANGS) /* { dg-warning "'num_gangs' value must be positive" "" { target c++ } } */
|
num_gangs (GANGS)
|
||||||
|
/* { dg-note {in expansion of macro 'GANGS'} {} { target c } .-1 } */
|
||||||
|
/* { dg-warning {'num_gangs' value must be positive} {} { target c++ } .-2 } */
|
||||||
|
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||||
{
|
{
|
||||||
/* We're actually executing with num_gangs (1). */
|
/* We're actually executing with num_gangs (1). */
|
||||||
gangs_actual = 1;
|
gangs_actual = 1;
|
||||||
@ -68,18 +92,27 @@ int main ()
|
|||||||
|
|
||||||
/* GP, WS, VS. */
|
/* GP, WS, VS. */
|
||||||
{
|
{
|
||||||
#define GANGS 0 /* { dg-warning "'num_gangs' value must be positive" "" { target c } } */
|
#define GANGS 0
|
||||||
|
/* { dg-warning {'num_gangs' value must be positive} {} { target c } .-1 } */
|
||||||
int gangs_actual = GANGS;
|
int gangs_actual = GANGS;
|
||||||
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
||||||
gangs_min = workers_min = vectors_min = INT_MAX;
|
gangs_min = workers_min = vectors_min = INT_MAX;
|
||||||
gangs_max = workers_max = vectors_max = INT_MIN;
|
gangs_max = workers_max = vectors_max = INT_MIN;
|
||||||
#pragma acc parallel copy (gangs_actual) \
|
#pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
|
||||||
num_gangs (GANGS) /* { dg-warning "'num_gangs' value must be positive" "" { target c++ } } */
|
copy (gangs_actual) \
|
||||||
/* { dg-warning "region contains gang partitioned code but is not gang partitioned" "" { target *-*-* } .-2 } */
|
num_gangs (GANGS)
|
||||||
|
/* { dg-note {in expansion of macro 'GANGS'} {} { target c } .-1 } */
|
||||||
|
/* { dg-warning {'num_gangs' value must be positive} {} { target c++ } .-2 } */
|
||||||
|
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||||
|
/* { dg-warning {region contains gang partitioned code but is not gang partitioned} {} { target *-*-* } l_compute$c_compute } */
|
||||||
{
|
{
|
||||||
/* We're actually executing with num_gangs (1). */
|
/* We're actually executing with num_gangs (1). */
|
||||||
gangs_actual = 1;
|
gangs_actual = 1;
|
||||||
#pragma acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
|
||||||
|
gang \
|
||||||
|
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
||||||
|
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||||
|
/* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||||
for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i)
|
for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i)
|
||||||
{
|
{
|
||||||
gangs_min = gangs_max = acc_gang ();
|
gangs_min = gangs_max = acc_gang ();
|
||||||
@ -98,18 +131,27 @@ int main ()
|
|||||||
|
|
||||||
/* GR, WP, VS. */
|
/* GR, WP, VS. */
|
||||||
{
|
{
|
||||||
#define WORKERS 0 /* { dg-warning "'num_workers' value must be positive" "" { target c } } */
|
#define WORKERS 0
|
||||||
|
/* { dg-warning {'num_workers' value must be positive} {} { target c } .-1 } */
|
||||||
int workers_actual = WORKERS;
|
int workers_actual = WORKERS;
|
||||||
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
||||||
gangs_min = workers_min = vectors_min = INT_MAX;
|
gangs_min = workers_min = vectors_min = INT_MAX;
|
||||||
gangs_max = workers_max = vectors_max = INT_MIN;
|
gangs_max = workers_max = vectors_max = INT_MIN;
|
||||||
#pragma acc parallel copy (workers_actual) \
|
#pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
|
||||||
num_workers (WORKERS) /* { dg-warning "'num_workers' value must be positive" "" { target c++ } } */
|
copy (workers_actual) \
|
||||||
/* { dg-warning "region contains worker partitioned code but is not worker partitioned" "" { target *-*-* } .-2 } */
|
num_workers (WORKERS)
|
||||||
|
/* { dg-note {in expansion of macro 'WORKERS'} {} { target c } .-1 } */
|
||||||
|
/* { dg-warning {'num_workers' value must be positive} {} { target c++ } .-2 } */
|
||||||
|
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||||
|
/* { dg-warning {region contains worker partitioned code but is not worker partitioned} {} { target *-*-* } l_compute$c_compute } */
|
||||||
{
|
{
|
||||||
/* We're actually executing with num_workers (1). */
|
/* We're actually executing with num_workers (1). */
|
||||||
workers_actual = 1;
|
workers_actual = 1;
|
||||||
#pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
|
||||||
|
worker \
|
||||||
|
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
||||||
|
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||||
|
/* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||||
for (int i = 100 * workers_actual; i > -100 * workers_actual; --i)
|
for (int i = 100 * workers_actual; i > -100 * workers_actual; --i)
|
||||||
{
|
{
|
||||||
gangs_min = gangs_max = acc_gang ();
|
gangs_min = gangs_max = acc_gang ();
|
||||||
@ -128,22 +170,34 @@ int main ()
|
|||||||
|
|
||||||
/* GR, WS, VP. */
|
/* GR, WS, VP. */
|
||||||
{
|
{
|
||||||
#define VECTORS 0 /* { dg-warning "'vector_length' value must be positive" "" { target c } } */
|
#define VECTORS 0
|
||||||
|
/* { dg-warning {'vector_length' value must be positive} {} { target c } .-1 } */
|
||||||
int vectors_actual = VECTORS;
|
int vectors_actual = VECTORS;
|
||||||
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
||||||
gangs_min = workers_min = vectors_min = INT_MAX;
|
gangs_min = workers_min = vectors_min = INT_MAX;
|
||||||
gangs_max = workers_max = vectors_max = INT_MIN;
|
gangs_max = workers_max = vectors_max = INT_MIN;
|
||||||
#pragma acc parallel copy (vectors_actual) /* { dg-warning "using .vector_length \\(32\\)., ignoring 1" "" { target openacc_nvidia_accel_selected } } */ \
|
#pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
|
||||||
vector_length (VECTORS) /* { dg-warning "'vector_length' value must be positive" "" { target c++ } } */
|
copy (vectors_actual) \
|
||||||
/* { dg-warning "region contains vector partitioned code but is not vector partitioned" "" { target *-*-* } .-2 } */
|
vector_length (VECTORS)
|
||||||
|
/* { dg-note {in expansion of macro 'VECTORS'} {} { target c } .-1 } */
|
||||||
|
/* { dg-warning {'vector_length' value must be positive} {} { target c++ } .-2 } */
|
||||||
|
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||||
|
/* { dg-warning {region contains vector partitioned code but is not vector partitioned} {} { target *-*-* } l_compute$c_compute } */
|
||||||
|
/* { dg-warning {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
|
||||||
{
|
{
|
||||||
/* We're actually executing with vector_length (1), just the GCC nvptx
|
/* We're actually executing with vector_length (1), just the GCC nvptx
|
||||||
back end enforces vector_length (32). */
|
back end enforces vector_length (32). */
|
||||||
if (acc_on_device (acc_device_nvidia))
|
if (acc_on_device (acc_device_nvidia))
|
||||||
|
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
|
||||||
|
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
|
||||||
vectors_actual = 32;
|
vectors_actual = 32;
|
||||||
else
|
else
|
||||||
vectors_actual = 1;
|
vectors_actual = 1;
|
||||||
#pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
|
||||||
|
vector \
|
||||||
|
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
||||||
|
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||||
|
/* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||||
for (int i = 100 * vectors_actual; i > -100 * vectors_actual; --i)
|
for (int i = 100 * vectors_actual; i > -100 * vectors_actual; --i)
|
||||||
{
|
{
|
||||||
gangs_min = gangs_max = acc_gang ();
|
gangs_min = gangs_max = acc_gang ();
|
||||||
@ -178,12 +232,16 @@ int main ()
|
|||||||
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
||||||
gangs_min = workers_min = vectors_min = INT_MAX;
|
gangs_min = workers_min = vectors_min = INT_MAX;
|
||||||
gangs_max = workers_max = vectors_max = INT_MIN;
|
gangs_max = workers_max = vectors_max = INT_MIN;
|
||||||
#pragma acc parallel copy (gangs_actual) \
|
#pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
|
||||||
|
copy (gangs_actual) \
|
||||||
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) \
|
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) \
|
||||||
num_gangs (gangs)
|
num_gangs (gangs)
|
||||||
/* { dg-bogus "warning: region is gang partitioned but does not contain gang partitioned code" "TODO 'reduction'" { xfail *-*-* } .-3 } */
|
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||||
|
/* { dg-bogus {warning: region is gang partitioned but does not contain gang partitioned code} {TODO 'reduction'} { xfail *-*-* } l_compute$c_compute } */
|
||||||
{
|
{
|
||||||
if (acc_on_device (acc_device_host))
|
if (acc_on_device (acc_device_host))
|
||||||
|
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
|
||||||
|
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
|
||||||
{
|
{
|
||||||
/* We're actually executing with num_gangs (1). */
|
/* We're actually executing with num_gangs (1). */
|
||||||
gangs_actual = 1;
|
gangs_actual = 1;
|
||||||
@ -214,15 +272,23 @@ int main ()
|
|||||||
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
||||||
gangs_min = workers_min = vectors_min = INT_MAX;
|
gangs_min = workers_min = vectors_min = INT_MAX;
|
||||||
gangs_max = workers_max = vectors_max = INT_MIN;
|
gangs_max = workers_max = vectors_max = INT_MIN;
|
||||||
#pragma acc parallel copy (gangs_actual) \
|
#pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
|
||||||
|
copy (gangs_actual) \
|
||||||
num_gangs (gangs)
|
num_gangs (gangs)
|
||||||
|
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||||
{
|
{
|
||||||
if (acc_on_device (acc_device_host))
|
if (acc_on_device (acc_device_host))
|
||||||
|
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
|
||||||
|
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
|
||||||
{
|
{
|
||||||
/* We're actually executing with num_gangs (1). */
|
/* We're actually executing with num_gangs (1). */
|
||||||
gangs_actual = 1;
|
gangs_actual = 1;
|
||||||
}
|
}
|
||||||
#pragma acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
|
||||||
|
gang \
|
||||||
|
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
||||||
|
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||||
|
/* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||||
for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i)
|
for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i)
|
||||||
{
|
{
|
||||||
gangs_min = gangs_max = acc_gang ();
|
gangs_min = gangs_max = acc_gang ();
|
||||||
@ -246,27 +312,40 @@ int main ()
|
|||||||
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
||||||
gangs_min = workers_min = vectors_min = INT_MAX;
|
gangs_min = workers_min = vectors_min = INT_MAX;
|
||||||
gangs_max = workers_max = vectors_max = INT_MIN;
|
gangs_max = workers_max = vectors_max = INT_MIN;
|
||||||
#pragma acc parallel copy (workers_actual) /* { dg-warning "using .num_workers \\(32\\)., ignoring 2097152" "" { target openacc_nvidia_accel_selected } } */ \
|
#pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
|
||||||
|
copy (workers_actual) \
|
||||||
num_workers (WORKERS)
|
num_workers (WORKERS)
|
||||||
|
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||||
|
/* { dg-warning {using 'num_workers \(32\)', ignoring 2097152} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
|
||||||
{
|
{
|
||||||
if (acc_on_device (acc_device_host))
|
if (acc_on_device (acc_device_host))
|
||||||
|
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
|
||||||
|
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
|
||||||
{
|
{
|
||||||
/* We're actually executing with num_workers (1). */
|
/* We're actually executing with num_workers (1). */
|
||||||
workers_actual = 1;
|
workers_actual = 1;
|
||||||
}
|
}
|
||||||
else if (acc_on_device (acc_device_nvidia))
|
else if (acc_on_device (acc_device_nvidia))
|
||||||
|
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
|
||||||
|
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
|
||||||
{
|
{
|
||||||
/* The GCC nvptx back end enforces num_workers (32). */
|
/* The GCC nvptx back end enforces num_workers (32). */
|
||||||
workers_actual = 32;
|
workers_actual = 32;
|
||||||
}
|
}
|
||||||
else if (acc_on_device (acc_device_radeon))
|
else if (acc_on_device (acc_device_radeon))
|
||||||
|
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
|
||||||
|
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
|
||||||
{
|
{
|
||||||
/* The GCC GCN back end is limited to num_workers (16). */
|
/* The GCC GCN back end is limited to num_workers (16). */
|
||||||
workers_actual = 16;
|
workers_actual = 16;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
__builtin_abort ();
|
__builtin_abort ();
|
||||||
#pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
|
||||||
|
worker \
|
||||||
|
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
||||||
|
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||||
|
/* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||||
for (int i = 100 * workers_actual; i > -100 * workers_actual; --i)
|
for (int i = 100 * workers_actual; i > -100 * workers_actual; --i)
|
||||||
{
|
{
|
||||||
gangs_min = gangs_max = acc_gang ();
|
gangs_min = gangs_max = acc_gang ();
|
||||||
@ -297,27 +376,39 @@ int main ()
|
|||||||
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
||||||
gangs_min = workers_min = vectors_min = INT_MAX;
|
gangs_min = workers_min = vectors_min = INT_MAX;
|
||||||
gangs_max = workers_max = vectors_max = INT_MIN;
|
gangs_max = workers_max = vectors_max = INT_MIN;
|
||||||
#pragma acc parallel copy (workers_actual) \
|
#pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
|
||||||
|
copy (workers_actual) \
|
||||||
num_workers (workers)
|
num_workers (workers)
|
||||||
|
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||||
{
|
{
|
||||||
if (acc_on_device (acc_device_host))
|
if (acc_on_device (acc_device_host))
|
||||||
|
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
|
||||||
|
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
|
||||||
{
|
{
|
||||||
/* We're actually executing with num_workers (1). */
|
/* We're actually executing with num_workers (1). */
|
||||||
workers_actual = 1;
|
workers_actual = 1;
|
||||||
}
|
}
|
||||||
else if (acc_on_device (acc_device_nvidia))
|
else if (acc_on_device (acc_device_nvidia))
|
||||||
|
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
|
||||||
|
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
|
||||||
{
|
{
|
||||||
/* We're actually executing with num_workers (32). */
|
/* We're actually executing with num_workers (32). */
|
||||||
/* workers_actual = 32; */
|
/* workers_actual = 32; */
|
||||||
}
|
}
|
||||||
else if (acc_on_device (acc_device_radeon))
|
else if (acc_on_device (acc_device_radeon))
|
||||||
|
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
|
||||||
|
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
|
||||||
{
|
{
|
||||||
/* The GCC GCN back end is limited to num_workers (16). */
|
/* The GCC GCN back end is limited to num_workers (16). */
|
||||||
workers_actual = 16;
|
workers_actual = 16;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
__builtin_abort ();
|
__builtin_abort ();
|
||||||
#pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
|
||||||
|
worker \
|
||||||
|
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
||||||
|
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||||
|
/* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||||
for (int i = 100 * workers_actual; i > -100 * workers_actual; --i)
|
for (int i = 100 * workers_actual; i > -100 * workers_actual; --i)
|
||||||
{
|
{
|
||||||
gangs_min = gangs_max = acc_gang ();
|
gangs_min = gangs_max = acc_gang ();
|
||||||
@ -341,27 +432,40 @@ int main ()
|
|||||||
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
||||||
gangs_min = workers_min = vectors_min = INT_MAX;
|
gangs_min = workers_min = vectors_min = INT_MAX;
|
||||||
gangs_max = workers_max = vectors_max = INT_MIN;
|
gangs_max = workers_max = vectors_max = INT_MIN;
|
||||||
#pragma acc parallel copy (vectors_actual) /* { dg-warning "using .vector_length \\(1024\\)., ignoring 2097152" "" { target openacc_nvidia_accel_selected } } */ \
|
#pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
|
||||||
|
copy (vectors_actual) \
|
||||||
vector_length (VECTORS)
|
vector_length (VECTORS)
|
||||||
|
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||||
|
/* { dg-warning {using 'vector_length \(1024\)', ignoring 2097152} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
|
||||||
{
|
{
|
||||||
if (acc_on_device (acc_device_host))
|
if (acc_on_device (acc_device_host))
|
||||||
|
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
|
||||||
|
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
|
||||||
{
|
{
|
||||||
/* We're actually executing with vector_length (1). */
|
/* We're actually executing with vector_length (1). */
|
||||||
vectors_actual = 1;
|
vectors_actual = 1;
|
||||||
}
|
}
|
||||||
else if (acc_on_device (acc_device_nvidia))
|
else if (acc_on_device (acc_device_nvidia))
|
||||||
|
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
|
||||||
|
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
|
||||||
{
|
{
|
||||||
/* The GCC nvptx back end reduces to vector_length (1024). */
|
/* The GCC nvptx back end reduces to vector_length (1024). */
|
||||||
vectors_actual = 1024;
|
vectors_actual = 1024;
|
||||||
}
|
}
|
||||||
else if (acc_on_device (acc_device_radeon))
|
else if (acc_on_device (acc_device_radeon))
|
||||||
|
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
|
||||||
|
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
|
||||||
{
|
{
|
||||||
/* The GCC GCN back end enforces vector_length (1): autovectorize. */
|
/* The GCC GCN back end enforces vector_length (1): autovectorize. */
|
||||||
vectors_actual = 1;
|
vectors_actual = 1;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
__builtin_abort ();
|
__builtin_abort ();
|
||||||
#pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
|
||||||
|
vector \
|
||||||
|
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
||||||
|
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||||
|
/* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||||
for (int i = 100 * vectors_actual; i > -100 * vectors_actual; --i)
|
for (int i = 100 * vectors_actual; i > -100 * vectors_actual; --i)
|
||||||
{
|
{
|
||||||
gangs_min = gangs_max = acc_gang ();
|
gangs_min = gangs_max = acc_gang ();
|
||||||
@ -386,20 +490,29 @@ int main ()
|
|||||||
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
||||||
gangs_min = workers_min = vectors_min = INT_MAX;
|
gangs_min = workers_min = vectors_min = INT_MAX;
|
||||||
gangs_max = workers_max = vectors_max = INT_MIN;
|
gangs_max = workers_max = vectors_max = INT_MIN;
|
||||||
#pragma acc parallel copy (vectors_actual) /* { dg-warning "using .vector_length \\(32\\)., ignoring runtime setting" "" { target openacc_nvidia_accel_selected } } */ \
|
#pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
|
||||||
|
copy (vectors_actual) \
|
||||||
vector_length (vectors)
|
vector_length (vectors)
|
||||||
|
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||||
|
/* { dg-warning {using 'vector_length \(32\)', ignoring runtime setting} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
|
||||||
{
|
{
|
||||||
if (acc_on_device (acc_device_host))
|
if (acc_on_device (acc_device_host))
|
||||||
|
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
|
||||||
|
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
|
||||||
{
|
{
|
||||||
/* We're actually executing with vector_length (1). */
|
/* We're actually executing with vector_length (1). */
|
||||||
vectors_actual = 1;
|
vectors_actual = 1;
|
||||||
}
|
}
|
||||||
else if (acc_on_device (acc_device_nvidia))
|
else if (acc_on_device (acc_device_nvidia))
|
||||||
|
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
|
||||||
|
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
|
||||||
{
|
{
|
||||||
/* The GCC nvptx back end enforces vector_length (32). */
|
/* The GCC nvptx back end enforces vector_length (32). */
|
||||||
vectors_actual = 32;
|
vectors_actual = 32;
|
||||||
}
|
}
|
||||||
else if (acc_on_device (acc_device_radeon))
|
else if (acc_on_device (acc_device_radeon))
|
||||||
|
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
|
||||||
|
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
|
||||||
{
|
{
|
||||||
/* Because of the way vectors are implemented for GCN, a vector loop
|
/* Because of the way vectors are implemented for GCN, a vector loop
|
||||||
containing a seq routine call will not vectorize calls to that
|
containing a seq routine call will not vectorize calls to that
|
||||||
@ -408,7 +521,11 @@ int main ()
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
__builtin_abort ();
|
__builtin_abort ();
|
||||||
#pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
|
||||||
|
vector \
|
||||||
|
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
||||||
|
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||||
|
/* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||||
for (int i = 100 * vectors_actual; i > -100 * vectors_actual; --i)
|
for (int i = 100 * vectors_actual; i > -100 * vectors_actual; --i)
|
||||||
{
|
{
|
||||||
gangs_min = gangs_max = acc_gang ();
|
gangs_min = gangs_max = acc_gang ();
|
||||||
@ -443,12 +560,17 @@ int main ()
|
|||||||
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
||||||
gangs_min = workers_min = vectors_min = INT_MAX;
|
gangs_min = workers_min = vectors_min = INT_MAX;
|
||||||
gangs_max = workers_max = vectors_max = INT_MIN;
|
gangs_max = workers_max = vectors_max = INT_MIN;
|
||||||
#pragma acc parallel copy (gangs_actual, workers_actual, vectors_actual) /* { dg-warning "using .vector_length \\(32\\)., ignoring 11" "" { target openacc_nvidia_accel_selected } } */ \
|
#pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
|
||||||
|
copy (gangs_actual, workers_actual, vectors_actual) \
|
||||||
num_gangs (gangs) \
|
num_gangs (gangs) \
|
||||||
num_workers (WORKERS) \
|
num_workers (WORKERS) \
|
||||||
vector_length (VECTORS)
|
vector_length (VECTORS)
|
||||||
|
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||||
|
/* { dg-warning {using 'vector_length \(32\)', ignoring 11} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
|
||||||
{
|
{
|
||||||
if (acc_on_device (acc_device_host))
|
if (acc_on_device (acc_device_host))
|
||||||
|
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
|
||||||
|
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
|
||||||
{
|
{
|
||||||
/* We're actually executing with num_gangs (1), num_workers (1),
|
/* We're actually executing with num_gangs (1), num_workers (1),
|
||||||
vector_length (1). */
|
vector_length (1). */
|
||||||
@ -457,22 +579,40 @@ int main ()
|
|||||||
vectors_actual = 1;
|
vectors_actual = 1;
|
||||||
}
|
}
|
||||||
else if (acc_on_device (acc_device_nvidia))
|
else if (acc_on_device (acc_device_nvidia))
|
||||||
|
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
|
||||||
|
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
|
||||||
{
|
{
|
||||||
/* The GCC nvptx back end enforces vector_length (32). */
|
/* The GCC nvptx back end enforces vector_length (32). */
|
||||||
vectors_actual = 32;
|
vectors_actual = 32;
|
||||||
}
|
}
|
||||||
else if (acc_on_device (acc_device_radeon))
|
else if (acc_on_device (acc_device_radeon))
|
||||||
|
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
|
||||||
|
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
|
||||||
{
|
{
|
||||||
/* See above comments about GCN vectors_actual. */
|
/* See above comments about GCN vectors_actual. */
|
||||||
vectors_actual = 1;
|
vectors_actual = 1;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
__builtin_abort ();
|
__builtin_abort ();
|
||||||
#pragma acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
|
||||||
|
gang \
|
||||||
|
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
||||||
|
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||||
|
/* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||||
|
/* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||||
for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i)
|
for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i)
|
||||||
#pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
#pragma acc loop /* { dg-line l_loop_j[incr c_loop_j] } */ \
|
||||||
|
worker \
|
||||||
|
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
||||||
|
/* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_j$c_loop_j } */
|
||||||
|
/* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_j$c_loop_j } */
|
||||||
|
/* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_j$c_loop_j } */
|
||||||
for (int j = 100 * workers_actual; j > -100 * workers_actual; --j)
|
for (int j = 100 * workers_actual; j > -100 * workers_actual; --j)
|
||||||
#pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
#pragma acc loop /* { dg-line l_loop_k[incr c_loop_k] } */ \
|
||||||
|
vector \
|
||||||
|
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
||||||
|
/* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_k$c_loop_k } */
|
||||||
|
/* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_k$c_loop_k } */
|
||||||
for (int k = 100 * vectors_actual; k > -100 * vectors_actual; --k)
|
for (int k = 100 * vectors_actual; k > -100 * vectors_actual; --k)
|
||||||
{
|
{
|
||||||
gangs_min = gangs_max = acc_gang ();
|
gangs_min = gangs_max = acc_gang ();
|
||||||
@ -502,12 +642,16 @@ int main ()
|
|||||||
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
||||||
gangs_min = workers_min = vectors_min = INT_MAX;
|
gangs_min = workers_min = vectors_min = INT_MAX;
|
||||||
gangs_max = workers_max = vectors_max = INT_MIN;
|
gangs_max = workers_max = vectors_max = INT_MIN;
|
||||||
#pragma acc kernels
|
#pragma acc kernels /* { dg-line l_compute[incr c_compute] } */
|
||||||
|
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||||
|
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute$c_compute } */
|
||||||
{
|
{
|
||||||
/* This is to make the OpenACC kernels construct unparallelizable. */
|
/* This is to make the OpenACC kernels construct unparallelizable. */
|
||||||
asm volatile ("" : : : "memory");
|
asm volatile ("" : : : "memory");
|
||||||
|
|
||||||
#pragma acc loop reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
|
||||||
|
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
||||||
|
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||||
for (int i = 100; i > -100; --i)
|
for (int i = 100; i > -100; --i)
|
||||||
{
|
{
|
||||||
gangs_min = gangs_max = acc_gang ();
|
gangs_min = gangs_max = acc_gang ();
|
||||||
@ -532,15 +676,19 @@ int main ()
|
|||||||
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
||||||
gangs_min = workers_min = vectors_min = INT_MAX;
|
gangs_min = workers_min = vectors_min = INT_MAX;
|
||||||
gangs_max = workers_max = vectors_max = INT_MIN;
|
gangs_max = workers_max = vectors_max = INT_MIN;
|
||||||
#pragma acc kernels \
|
#pragma acc kernels /* { dg-line l_compute[incr c_compute] } */ \
|
||||||
num_gangs (gangs) \
|
num_gangs (gangs) \
|
||||||
num_workers (WORKERS) \
|
num_workers (WORKERS) \
|
||||||
vector_length (VECTORS)
|
vector_length (VECTORS)
|
||||||
|
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||||
|
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute$c_compute } */
|
||||||
{
|
{
|
||||||
/* This is to make the OpenACC kernels construct unparallelizable. */
|
/* This is to make the OpenACC kernels construct unparallelizable. */
|
||||||
asm volatile ("" : : : "memory");
|
asm volatile ("" : : : "memory");
|
||||||
|
|
||||||
#pragma acc loop reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
|
||||||
|
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
||||||
|
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||||
for (int i = 100; i > -100; --i)
|
for (int i = 100; i > -100; --i)
|
||||||
{
|
{
|
||||||
gangs_min = gangs_max = acc_gang ();
|
gangs_min = gangs_max = acc_gang ();
|
||||||
@ -564,8 +712,10 @@ int main ()
|
|||||||
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
||||||
gangs_min = workers_min = vectors_min = INT_MAX;
|
gangs_min = workers_min = vectors_min = INT_MAX;
|
||||||
gangs_max = workers_max = vectors_max = INT_MIN;
|
gangs_max = workers_max = vectors_max = INT_MIN;
|
||||||
#pragma acc serial /* { dg-warning "using .vector_length \\(32\\)., ignoring 1" "" { target openacc_nvidia_accel_selected } } */ \
|
#pragma acc serial /* { dg-line l_compute[incr c_compute] } */ \
|
||||||
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
||||||
|
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||||
|
/* { dg-warning {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
|
||||||
{
|
{
|
||||||
for (int i = 100; i > -100; i--)
|
for (int i = 100; i > -100; i--)
|
||||||
{
|
{
|
||||||
@ -586,13 +736,18 @@ int main ()
|
|||||||
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
|
||||||
gangs_min = workers_min = vectors_min = INT_MAX;
|
gangs_min = workers_min = vectors_min = INT_MAX;
|
||||||
gangs_max = workers_max = vectors_max = INT_MIN;
|
gangs_max = workers_max = vectors_max = INT_MIN;
|
||||||
#pragma acc serial copy (vectors_actual) /* { dg-warning "using .vector_length \\(32\\)., ignoring 1" "" { target openacc_nvidia_accel_selected } } */ \
|
#pragma acc serial /* { dg-line l_compute[incr c_compute] } */ \
|
||||||
|
copy (vectors_actual) \
|
||||||
copy (gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max)
|
copy (gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max)
|
||||||
/* { dg-bogus "warning: region contains gang partitioned code but is not gang partitioned" "TODO 'serial'" { xfail *-*-* } .-2 }
|
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
|
||||||
{ dg-bogus "warning: region contains worker partitioned code but is not worker partitioned" "TODO 'serial'" { xfail *-*-* } .-3 }
|
/* { dg-bogus {warning: region contains gang partitioned code but is not gang partitioned} {TODO 'serial'} { xfail *-*-* } l_compute$c_compute }
|
||||||
{ dg-bogus "warning: region contains vector partitioned code but is not vector partitioned" "TODO 'serial'" { xfail *-*-* } .-4 } */
|
{ dg-bogus {warning: region contains worker partitioned code but is not worker partitioned} {TODO 'serial'} { xfail *-*-* } l_compute$c_compute }
|
||||||
|
{ dg-bogus {warning: region contains vector partitioned code but is not vector partitioned} {TODO 'serial'} { xfail *-*-* } l_compute$c_compute } */
|
||||||
|
/* { dg-warning {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
|
||||||
{
|
{
|
||||||
if (acc_on_device (acc_device_nvidia))
|
if (acc_on_device (acc_device_nvidia))
|
||||||
|
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
|
||||||
|
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
|
||||||
{
|
{
|
||||||
/* The GCC nvptx back end enforces vector_length (32). */
|
/* The GCC nvptx back end enforces vector_length (32). */
|
||||||
/* It's unclear if that's actually permissible here;
|
/* It's unclear if that's actually permissible here;
|
||||||
@ -600,11 +755,25 @@ int main ()
|
|||||||
'serial' construct might not actually be serial". */
|
'serial' construct might not actually be serial". */
|
||||||
vectors_actual = 32;
|
vectors_actual = 32;
|
||||||
}
|
}
|
||||||
#pragma acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
|
||||||
|
gang \
|
||||||
|
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
||||||
|
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||||
|
/* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||||
|
/* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
|
||||||
for (int i = 100; i > -100; i--)
|
for (int i = 100; i > -100; i--)
|
||||||
#pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
#pragma acc loop /* { dg-line l_loop_j[incr c_loop_j] } */ \
|
||||||
|
worker \
|
||||||
|
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
||||||
|
/* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_j$c_loop_j } */
|
||||||
|
/* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_j$c_loop_j } */
|
||||||
|
/* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_j$c_loop_j } */
|
||||||
for (int j = 100; j > -100; j--)
|
for (int j = 100; j > -100; j--)
|
||||||
#pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
#pragma acc loop /* { dg-line l_loop_k[incr c_loop_k] } */ \
|
||||||
|
vector \
|
||||||
|
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
|
||||||
|
/* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_k$c_loop_k } */
|
||||||
|
/* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_k$c_loop_k } */
|
||||||
for (int k = 100 * vectors_actual; k > -100 * vectors_actual; k--)
|
for (int k = 100 * vectors_actual; k > -100 * vectors_actual; k--)
|
||||||
{
|
{
|
||||||
gangs_min = gangs_max = acc_gang ();
|
gangs_min = gangs_max = acc_gang ();
|
||||||
|
@ -2,14 +2,24 @@
|
|||||||
|
|
||||||
! { dg-do run }
|
! { dg-do run }
|
||||||
|
|
||||||
|
! { dg-additional-options "-fopt-info-all-omp" }
|
||||||
|
! { dg-additional-options "-foffload=-fopt-info-all-omp" } */
|
||||||
|
|
||||||
|
! { dg-additional-options "--param=openacc-privatization=noisy" }
|
||||||
|
! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" }
|
||||||
|
! Prune a few: uninteresting, and potentially varying depending on GCC configuration (data types):
|
||||||
|
! { dg-prune-output {note: variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} } */
|
||||||
|
|
||||||
program reduction
|
program reduction
|
||||||
integer, parameter :: n = 20
|
integer, parameter :: n = 20
|
||||||
integer :: i, red
|
integer :: i, red
|
||||||
|
|
||||||
red = 0
|
red = 0
|
||||||
|
|
||||||
!$acc kernels
|
!$acc kernels ! { dg-line l_compute1 } */
|
||||||
!$acc loop reduction (+:red)
|
! { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute1 }
|
||||||
|
!$acc loop reduction (+:red) ! { dg-line l_loop_i1 }
|
||||||
|
! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i1 }
|
||||||
do i = 1, n
|
do i = 1, n
|
||||||
red = red + 1
|
red = red + 1
|
||||||
end do
|
end do
|
||||||
|
Loading…
Reference in New Issue
Block a user