Enhance further testcases to verify handling of OpenACC privatization level [PR90115]

As originally introduced in commit 11b8286a83
"[OpenACC privatization] Largely extend diagnostics and corresponding testsuite
coverage [PR90115]".

	PR middle-end/90115
	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/default-1.c: Enhance.
	* testsuite/libgomp.oacc-c-c++-common/kernels-reduction-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Likewise.
	* testsuite/libgomp.oacc-fortran/kernels-reduction-1.f90: Likewise.
This commit is contained in:
Thomas Schwinge 2022-03-11 15:10:59 +01:00
parent 337ed336d7
commit 2e53fa7bb2
4 changed files with 266 additions and 55 deletions

View File

@ -1,4 +1,18 @@
/* { dg-do run } */ /* { dg-additional-options "-fopt-info-all-omp" }
{ dg-additional-options "-foffload=-fopt-info-all-omp" } */
/* { dg-additional-options "--param=openacc-privatization=noisy" }
{ dg-additional-options "-foffload=--param=openacc-privatization=noisy" }
Prune a few: uninteresting, and potentially varying depending on GCC configuration (data types):
{ dg-prune-output {note: variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} } */
/* It's only with Tcl 8.5 (released in 2007) that "the variable 'varName'
passed to 'incr' may be unset, and in that case, it will be set to [...]",
so to maintain compatibility with earlier Tcl releases, we manually
initialize counter variables:
{ dg-line l_dummy[variable c_compute 0 c_loop_i 0] }
{ dg-message dummy {} { target iN-VAl-Id } l_dummy } to avoid
"WARNING: dg-line var l_dummy defined, but not used". */
#include <openacc.h> #include <openacc.h>
@ -13,10 +27,15 @@ int test_parallel ()
ary[i] = ~0; ary[i] = ~0;
/* val defaults to firstprivate, ary defaults to copy. */ /* val defaults to firstprivate, ary defaults to copy. */
#pragma acc parallel num_gangs (32) copy (ok) copy(ondev) #pragma acc parallel num_gangs (32) copy (ok) copy(ondev) /* { dg-line l_compute[incr c_compute] } */
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
{ {
ondev = acc_on_device (acc_device_not_host); ondev = acc_on_device (acc_device_not_host);
#pragma acc loop gang(static:1) /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
#pragma acc loop gang(static:1) /* { dg-line l_loop_i[incr c_loop_i] } */
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
for (unsigned i = 0; i < 32; i++) for (unsigned i = 0; i < 32; i++)
{ {
if (val != 2) if (val != 2)
@ -51,10 +70,13 @@ int test_kernels ()
ary[i] = ~0; ary[i] = ~0;
/* val defaults to copy, ary defaults to copy. */ /* val defaults to copy, ary defaults to copy. */
#pragma acc kernels copy(ondev) #pragma acc kernels copy(ondev) /* { dg-line l_compute[incr c_compute] } */
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute$c_compute } */
{ {
ondev = acc_on_device (acc_device_not_host); ondev = acc_on_device (acc_device_not_host);
#pragma acc loop #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
for (unsigned i = 0; i < 32; i++) for (unsigned i = 0; i < 32; i++)
{ {
ary[i] = val; ary[i] = val;

View File

@ -1,6 +1,14 @@
/* Verify that a simple, explicit acc loop reduction works inside /* Verify that a simple, explicit acc loop reduction works inside
a kernels region. */ a kernels region. */
/* { dg-additional-options "-fopt-info-all-omp" }
{ dg-additional-options "-foffload=-fopt-info-all-omp" } */
/* { dg-additional-options "--param=openacc-privatization=noisy" }
{ dg-additional-options "-foffload=--param=openacc-privatization=noisy" }
Prune a few: uninteresting, and potentially varying depending on GCC configuration (data types):
{ dg-prune-output {note: variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} } */
#include <stdlib.h> #include <stdlib.h>
#define N 100 #define N 100
@ -10,9 +18,11 @@ main ()
{ {
int i, red = 0; int i, red = 0;
#pragma acc kernels #pragma acc kernels /* { dg-line l_compute1 } */
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute1 } */
{ {
#pragma acc loop reduction (+:red) #pragma acc loop reduction (+:red) /* { dg-line l_loop_i1 } */
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i1 } */
for (i = 0; i < N; i++) for (i = 0; i < N; i++)
red++; red++;
} }

View File

@ -1,6 +1,22 @@
/* OpenACC parallelism dimensions clauses: num_gangs, num_workers, /* OpenACC parallelism dimensions clauses: num_gangs, num_workers,
vector_length. */ vector_length. */
/* { dg-additional-options "-fopt-info-all-omp" }
{ dg-additional-options "-foffload=-fopt-info-all-omp" } */
/* { dg-additional-options "--param=openacc-privatization=noisy" }
{ dg-additional-options "-foffload=--param=openacc-privatization=noisy" }
Prune a few: uninteresting, and potentially varying depending on GCC configuration (data types):
{ dg-prune-output {note: variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} } */
/* It's only with Tcl 8.5 (released in 2007) that "the variable 'varName'
passed to 'incr' may be unset, and in that case, it will be set to [...]",
so to maintain compatibility with earlier Tcl releases, we manually
initialize counter variables:
{ dg-line l_dummy[variable c_compute 0 c_loop_i 0 c_loop_j 0 c_loop_k 0] }
{ dg-message dummy {} { target iN-VAl-Id } l_dummy } to avoid
"WARNING: dg-line var l_dummy defined, but not used". */
/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting /* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting
aspects of that functionality. */ aspects of that functionality. */
@ -11,18 +27,21 @@
#include <gomp-constants.h> #include <gomp-constants.h>
#pragma acc routine seq #pragma acc routine seq
inline __attribute__ ((always_inline))
static int acc_gang () static int acc_gang ()
{ {
return __builtin_goacc_parlevel_id (GOMP_DIM_GANG); return __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
} }
#pragma acc routine seq #pragma acc routine seq
inline __attribute__ ((always_inline))
static int acc_worker () static int acc_worker ()
{ {
return __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); return __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
} }
#pragma acc routine seq #pragma acc routine seq
inline __attribute__ ((always_inline))
static int acc_vector () static int acc_vector ()
{ {
return __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); return __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
@ -39,14 +58,19 @@ int main ()
/* GR, WS, VS. */ /* GR, WS, VS. */
{ {
#define GANGS 0 /* { dg-warning "'num_gangs' value must be positive" "" { target c } } */ #define GANGS 0
/* { dg-warning {'num_gangs' value must be positive} {} { target c } .-1 } */
int gangs_actual = GANGS; int gangs_actual = GANGS;
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max; int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
gangs_min = workers_min = vectors_min = INT_MAX; gangs_min = workers_min = vectors_min = INT_MAX;
gangs_max = workers_max = vectors_max = INT_MIN; gangs_max = workers_max = vectors_max = INT_MIN;
#pragma acc parallel copy (gangs_actual) \ #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
copy (gangs_actual) \
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) \ reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) \
num_gangs (GANGS) /* { dg-warning "'num_gangs' value must be positive" "" { target c++ } } */ num_gangs (GANGS)
/* { dg-note {in expansion of macro 'GANGS'} {} { target c } .-1 } */
/* { dg-warning {'num_gangs' value must be positive} {} { target c++ } .-2 } */
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
{ {
/* We're actually executing with num_gangs (1). */ /* We're actually executing with num_gangs (1). */
gangs_actual = 1; gangs_actual = 1;
@ -68,18 +92,27 @@ int main ()
/* GP, WS, VS. */ /* GP, WS, VS. */
{ {
#define GANGS 0 /* { dg-warning "'num_gangs' value must be positive" "" { target c } } */ #define GANGS 0
/* { dg-warning {'num_gangs' value must be positive} {} { target c } .-1 } */
int gangs_actual = GANGS; int gangs_actual = GANGS;
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max; int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
gangs_min = workers_min = vectors_min = INT_MAX; gangs_min = workers_min = vectors_min = INT_MAX;
gangs_max = workers_max = vectors_max = INT_MIN; gangs_max = workers_max = vectors_max = INT_MIN;
#pragma acc parallel copy (gangs_actual) \ #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
num_gangs (GANGS) /* { dg-warning "'num_gangs' value must be positive" "" { target c++ } } */ copy (gangs_actual) \
/* { dg-warning "region contains gang partitioned code but is not gang partitioned" "" { target *-*-* } .-2 } */ num_gangs (GANGS)
/* { dg-note {in expansion of macro 'GANGS'} {} { target c } .-1 } */
/* { dg-warning {'num_gangs' value must be positive} {} { target c++ } .-2 } */
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
/* { dg-warning {region contains gang partitioned code but is not gang partitioned} {} { target *-*-* } l_compute$c_compute } */
{ {
/* We're actually executing with num_gangs (1). */ /* We're actually executing with num_gangs (1). */
gangs_actual = 1; gangs_actual = 1;
#pragma acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
gang \
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i) for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i)
{ {
gangs_min = gangs_max = acc_gang (); gangs_min = gangs_max = acc_gang ();
@ -98,18 +131,27 @@ int main ()
/* GR, WP, VS. */ /* GR, WP, VS. */
{ {
#define WORKERS 0 /* { dg-warning "'num_workers' value must be positive" "" { target c } } */ #define WORKERS 0
/* { dg-warning {'num_workers' value must be positive} {} { target c } .-1 } */
int workers_actual = WORKERS; int workers_actual = WORKERS;
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max; int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
gangs_min = workers_min = vectors_min = INT_MAX; gangs_min = workers_min = vectors_min = INT_MAX;
gangs_max = workers_max = vectors_max = INT_MIN; gangs_max = workers_max = vectors_max = INT_MIN;
#pragma acc parallel copy (workers_actual) \ #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
num_workers (WORKERS) /* { dg-warning "'num_workers' value must be positive" "" { target c++ } } */ copy (workers_actual) \
/* { dg-warning "region contains worker partitioned code but is not worker partitioned" "" { target *-*-* } .-2 } */ num_workers (WORKERS)
/* { dg-note {in expansion of macro 'WORKERS'} {} { target c } .-1 } */
/* { dg-warning {'num_workers' value must be positive} {} { target c++ } .-2 } */
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
/* { dg-warning {region contains worker partitioned code but is not worker partitioned} {} { target *-*-* } l_compute$c_compute } */
{ {
/* We're actually executing with num_workers (1). */ /* We're actually executing with num_workers (1). */
workers_actual = 1; workers_actual = 1;
#pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
worker \
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
for (int i = 100 * workers_actual; i > -100 * workers_actual; --i) for (int i = 100 * workers_actual; i > -100 * workers_actual; --i)
{ {
gangs_min = gangs_max = acc_gang (); gangs_min = gangs_max = acc_gang ();
@ -128,22 +170,34 @@ int main ()
/* GR, WS, VP. */ /* GR, WS, VP. */
{ {
#define VECTORS 0 /* { dg-warning "'vector_length' value must be positive" "" { target c } } */ #define VECTORS 0
/* { dg-warning {'vector_length' value must be positive} {} { target c } .-1 } */
int vectors_actual = VECTORS; int vectors_actual = VECTORS;
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max; int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
gangs_min = workers_min = vectors_min = INT_MAX; gangs_min = workers_min = vectors_min = INT_MAX;
gangs_max = workers_max = vectors_max = INT_MIN; gangs_max = workers_max = vectors_max = INT_MIN;
#pragma acc parallel copy (vectors_actual) /* { dg-warning "using .vector_length \\(32\\)., ignoring 1" "" { target openacc_nvidia_accel_selected } } */ \ #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
vector_length (VECTORS) /* { dg-warning "'vector_length' value must be positive" "" { target c++ } } */ copy (vectors_actual) \
/* { dg-warning "region contains vector partitioned code but is not vector partitioned" "" { target *-*-* } .-2 } */ vector_length (VECTORS)
/* { dg-note {in expansion of macro 'VECTORS'} {} { target c } .-1 } */
/* { dg-warning {'vector_length' value must be positive} {} { target c++ } .-2 } */
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
/* { dg-warning {region contains vector partitioned code but is not vector partitioned} {} { target *-*-* } l_compute$c_compute } */
/* { dg-warning {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
{ {
/* We're actually executing with vector_length (1), just the GCC nvptx /* We're actually executing with vector_length (1), just the GCC nvptx
back end enforces vector_length (32). */ back end enforces vector_length (32). */
if (acc_on_device (acc_device_nvidia)) if (acc_on_device (acc_device_nvidia))
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
vectors_actual = 32; vectors_actual = 32;
else else
vectors_actual = 1; vectors_actual = 1;
#pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
vector \
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
for (int i = 100 * vectors_actual; i > -100 * vectors_actual; --i) for (int i = 100 * vectors_actual; i > -100 * vectors_actual; --i)
{ {
gangs_min = gangs_max = acc_gang (); gangs_min = gangs_max = acc_gang ();
@ -178,12 +232,16 @@ int main ()
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max; int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
gangs_min = workers_min = vectors_min = INT_MAX; gangs_min = workers_min = vectors_min = INT_MAX;
gangs_max = workers_max = vectors_max = INT_MIN; gangs_max = workers_max = vectors_max = INT_MIN;
#pragma acc parallel copy (gangs_actual) \ #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
copy (gangs_actual) \
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) \ reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) \
num_gangs (gangs) num_gangs (gangs)
/* { dg-bogus "warning: region is gang partitioned but does not contain gang partitioned code" "TODO 'reduction'" { xfail *-*-* } .-3 } */ /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
/* { dg-bogus {warning: region is gang partitioned but does not contain gang partitioned code} {TODO 'reduction'} { xfail *-*-* } l_compute$c_compute } */
{ {
if (acc_on_device (acc_device_host)) if (acc_on_device (acc_device_host))
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
{ {
/* We're actually executing with num_gangs (1). */ /* We're actually executing with num_gangs (1). */
gangs_actual = 1; gangs_actual = 1;
@ -214,15 +272,23 @@ int main ()
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max; int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
gangs_min = workers_min = vectors_min = INT_MAX; gangs_min = workers_min = vectors_min = INT_MAX;
gangs_max = workers_max = vectors_max = INT_MIN; gangs_max = workers_max = vectors_max = INT_MIN;
#pragma acc parallel copy (gangs_actual) \ #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
copy (gangs_actual) \
num_gangs (gangs) num_gangs (gangs)
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
{ {
if (acc_on_device (acc_device_host)) if (acc_on_device (acc_device_host))
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
{ {
/* We're actually executing with num_gangs (1). */ /* We're actually executing with num_gangs (1). */
gangs_actual = 1; gangs_actual = 1;
} }
#pragma acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
gang \
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i) for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i)
{ {
gangs_min = gangs_max = acc_gang (); gangs_min = gangs_max = acc_gang ();
@ -246,27 +312,40 @@ int main ()
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max; int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
gangs_min = workers_min = vectors_min = INT_MAX; gangs_min = workers_min = vectors_min = INT_MAX;
gangs_max = workers_max = vectors_max = INT_MIN; gangs_max = workers_max = vectors_max = INT_MIN;
#pragma acc parallel copy (workers_actual) /* { dg-warning "using .num_workers \\(32\\)., ignoring 2097152" "" { target openacc_nvidia_accel_selected } } */ \ #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
copy (workers_actual) \
num_workers (WORKERS) num_workers (WORKERS)
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
/* { dg-warning {using 'num_workers \(32\)', ignoring 2097152} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
{ {
if (acc_on_device (acc_device_host)) if (acc_on_device (acc_device_host))
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
{ {
/* We're actually executing with num_workers (1). */ /* We're actually executing with num_workers (1). */
workers_actual = 1; workers_actual = 1;
} }
else if (acc_on_device (acc_device_nvidia)) else if (acc_on_device (acc_device_nvidia))
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
{ {
/* The GCC nvptx back end enforces num_workers (32). */ /* The GCC nvptx back end enforces num_workers (32). */
workers_actual = 32; workers_actual = 32;
} }
else if (acc_on_device (acc_device_radeon)) else if (acc_on_device (acc_device_radeon))
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
{ {
/* The GCC GCN back end is limited to num_workers (16). */ /* The GCC GCN back end is limited to num_workers (16). */
workers_actual = 16; workers_actual = 16;
} }
else else
__builtin_abort (); __builtin_abort ();
#pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
worker \
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
for (int i = 100 * workers_actual; i > -100 * workers_actual; --i) for (int i = 100 * workers_actual; i > -100 * workers_actual; --i)
{ {
gangs_min = gangs_max = acc_gang (); gangs_min = gangs_max = acc_gang ();
@ -297,27 +376,39 @@ int main ()
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max; int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
gangs_min = workers_min = vectors_min = INT_MAX; gangs_min = workers_min = vectors_min = INT_MAX;
gangs_max = workers_max = vectors_max = INT_MIN; gangs_max = workers_max = vectors_max = INT_MIN;
#pragma acc parallel copy (workers_actual) \ #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
copy (workers_actual) \
num_workers (workers) num_workers (workers)
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
{ {
if (acc_on_device (acc_device_host)) if (acc_on_device (acc_device_host))
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
{ {
/* We're actually executing with num_workers (1). */ /* We're actually executing with num_workers (1). */
workers_actual = 1; workers_actual = 1;
} }
else if (acc_on_device (acc_device_nvidia)) else if (acc_on_device (acc_device_nvidia))
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
{ {
/* We're actually executing with num_workers (32). */ /* We're actually executing with num_workers (32). */
/* workers_actual = 32; */ /* workers_actual = 32; */
} }
else if (acc_on_device (acc_device_radeon)) else if (acc_on_device (acc_device_radeon))
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
{ {
/* The GCC GCN back end is limited to num_workers (16). */ /* The GCC GCN back end is limited to num_workers (16). */
workers_actual = 16; workers_actual = 16;
} }
else else
__builtin_abort (); __builtin_abort ();
#pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
worker \
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
for (int i = 100 * workers_actual; i > -100 * workers_actual; --i) for (int i = 100 * workers_actual; i > -100 * workers_actual; --i)
{ {
gangs_min = gangs_max = acc_gang (); gangs_min = gangs_max = acc_gang ();
@ -341,27 +432,40 @@ int main ()
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max; int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
gangs_min = workers_min = vectors_min = INT_MAX; gangs_min = workers_min = vectors_min = INT_MAX;
gangs_max = workers_max = vectors_max = INT_MIN; gangs_max = workers_max = vectors_max = INT_MIN;
#pragma acc parallel copy (vectors_actual) /* { dg-warning "using .vector_length \\(1024\\)., ignoring 2097152" "" { target openacc_nvidia_accel_selected } } */ \ #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
copy (vectors_actual) \
vector_length (VECTORS) vector_length (VECTORS)
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
/* { dg-warning {using 'vector_length \(1024\)', ignoring 2097152} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
{ {
if (acc_on_device (acc_device_host)) if (acc_on_device (acc_device_host))
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
{ {
/* We're actually executing with vector_length (1). */ /* We're actually executing with vector_length (1). */
vectors_actual = 1; vectors_actual = 1;
} }
else if (acc_on_device (acc_device_nvidia)) else if (acc_on_device (acc_device_nvidia))
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
{ {
/* The GCC nvptx back end reduces to vector_length (1024). */ /* The GCC nvptx back end reduces to vector_length (1024). */
vectors_actual = 1024; vectors_actual = 1024;
} }
else if (acc_on_device (acc_device_radeon)) else if (acc_on_device (acc_device_radeon))
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
{ {
/* The GCC GCN back end enforces vector_length (1): autovectorize. */ /* The GCC GCN back end enforces vector_length (1): autovectorize. */
vectors_actual = 1; vectors_actual = 1;
} }
else else
__builtin_abort (); __builtin_abort ();
#pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
vector \
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
for (int i = 100 * vectors_actual; i > -100 * vectors_actual; --i) for (int i = 100 * vectors_actual; i > -100 * vectors_actual; --i)
{ {
gangs_min = gangs_max = acc_gang (); gangs_min = gangs_max = acc_gang ();
@ -386,20 +490,29 @@ int main ()
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max; int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
gangs_min = workers_min = vectors_min = INT_MAX; gangs_min = workers_min = vectors_min = INT_MAX;
gangs_max = workers_max = vectors_max = INT_MIN; gangs_max = workers_max = vectors_max = INT_MIN;
#pragma acc parallel copy (vectors_actual) /* { dg-warning "using .vector_length \\(32\\)., ignoring runtime setting" "" { target openacc_nvidia_accel_selected } } */ \ #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
copy (vectors_actual) \
vector_length (vectors) vector_length (vectors)
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
/* { dg-warning {using 'vector_length \(32\)', ignoring runtime setting} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
{ {
if (acc_on_device (acc_device_host)) if (acc_on_device (acc_device_host))
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
{ {
/* We're actually executing with vector_length (1). */ /* We're actually executing with vector_length (1). */
vectors_actual = 1; vectors_actual = 1;
} }
else if (acc_on_device (acc_device_nvidia)) else if (acc_on_device (acc_device_nvidia))
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
{ {
/* The GCC nvptx back end enforces vector_length (32). */ /* The GCC nvptx back end enforces vector_length (32). */
vectors_actual = 32; vectors_actual = 32;
} }
else if (acc_on_device (acc_device_radeon)) else if (acc_on_device (acc_device_radeon))
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
{ {
/* Because of the way vectors are implemented for GCN, a vector loop /* Because of the way vectors are implemented for GCN, a vector loop
containing a seq routine call will not vectorize calls to that containing a seq routine call will not vectorize calls to that
@ -408,7 +521,11 @@ int main ()
} }
else else
__builtin_abort (); __builtin_abort ();
#pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
vector \
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
for (int i = 100 * vectors_actual; i > -100 * vectors_actual; --i) for (int i = 100 * vectors_actual; i > -100 * vectors_actual; --i)
{ {
gangs_min = gangs_max = acc_gang (); gangs_min = gangs_max = acc_gang ();
@ -443,12 +560,17 @@ int main ()
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max; int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
gangs_min = workers_min = vectors_min = INT_MAX; gangs_min = workers_min = vectors_min = INT_MAX;
gangs_max = workers_max = vectors_max = INT_MIN; gangs_max = workers_max = vectors_max = INT_MIN;
#pragma acc parallel copy (gangs_actual, workers_actual, vectors_actual) /* { dg-warning "using .vector_length \\(32\\)., ignoring 11" "" { target openacc_nvidia_accel_selected } } */ \ #pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
copy (gangs_actual, workers_actual, vectors_actual) \
num_gangs (gangs) \ num_gangs (gangs) \
num_workers (WORKERS) \ num_workers (WORKERS) \
vector_length (VECTORS) vector_length (VECTORS)
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
/* { dg-warning {using 'vector_length \(32\)', ignoring 11} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
{ {
if (acc_on_device (acc_device_host)) if (acc_on_device (acc_device_host))
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
{ {
/* We're actually executing with num_gangs (1), num_workers (1), /* We're actually executing with num_gangs (1), num_workers (1),
vector_length (1). */ vector_length (1). */
@ -457,22 +579,40 @@ int main ()
vectors_actual = 1; vectors_actual = 1;
} }
else if (acc_on_device (acc_device_nvidia)) else if (acc_on_device (acc_device_nvidia))
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
{ {
/* The GCC nvptx back end enforces vector_length (32). */ /* The GCC nvptx back end enforces vector_length (32). */
vectors_actual = 32; vectors_actual = 32;
} }
else if (acc_on_device (acc_device_radeon)) else if (acc_on_device (acc_device_radeon))
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
{ {
/* See above comments about GCN vectors_actual. */ /* See above comments about GCN vectors_actual. */
vectors_actual = 1; vectors_actual = 1;
} }
else else
__builtin_abort (); __builtin_abort ();
#pragma acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
gang \
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i) for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i)
#pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) #pragma acc loop /* { dg-line l_loop_j[incr c_loop_j] } */ \
worker \
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
/* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_j$c_loop_j } */
/* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_j$c_loop_j } */
/* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_j$c_loop_j } */
for (int j = 100 * workers_actual; j > -100 * workers_actual; --j) for (int j = 100 * workers_actual; j > -100 * workers_actual; --j)
#pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) #pragma acc loop /* { dg-line l_loop_k[incr c_loop_k] } */ \
vector \
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
/* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_k$c_loop_k } */
/* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_k$c_loop_k } */
for (int k = 100 * vectors_actual; k > -100 * vectors_actual; --k) for (int k = 100 * vectors_actual; k > -100 * vectors_actual; --k)
{ {
gangs_min = gangs_max = acc_gang (); gangs_min = gangs_max = acc_gang ();
@ -502,12 +642,16 @@ int main ()
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max; int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
gangs_min = workers_min = vectors_min = INT_MAX; gangs_min = workers_min = vectors_min = INT_MAX;
gangs_max = workers_max = vectors_max = INT_MIN; gangs_max = workers_max = vectors_max = INT_MIN;
#pragma acc kernels #pragma acc kernels /* { dg-line l_compute[incr c_compute] } */
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute$c_compute } */
{ {
/* This is to make the OpenACC kernels construct unparallelizable. */ /* This is to make the OpenACC kernels construct unparallelizable. */
asm volatile ("" : : : "memory"); asm volatile ("" : : : "memory");
#pragma acc loop reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
for (int i = 100; i > -100; --i) for (int i = 100; i > -100; --i)
{ {
gangs_min = gangs_max = acc_gang (); gangs_min = gangs_max = acc_gang ();
@ -532,15 +676,19 @@ int main ()
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max; int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
gangs_min = workers_min = vectors_min = INT_MAX; gangs_min = workers_min = vectors_min = INT_MAX;
gangs_max = workers_max = vectors_max = INT_MIN; gangs_max = workers_max = vectors_max = INT_MIN;
#pragma acc kernels \ #pragma acc kernels /* { dg-line l_compute[incr c_compute] } */ \
num_gangs (gangs) \ num_gangs (gangs) \
num_workers (WORKERS) \ num_workers (WORKERS) \
vector_length (VECTORS) vector_length (VECTORS)
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute$c_compute } */
{ {
/* This is to make the OpenACC kernels construct unparallelizable. */ /* This is to make the OpenACC kernels construct unparallelizable. */
asm volatile ("" : : : "memory"); asm volatile ("" : : : "memory");
#pragma acc loop reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
for (int i = 100; i > -100; --i) for (int i = 100; i > -100; --i)
{ {
gangs_min = gangs_max = acc_gang (); gangs_min = gangs_max = acc_gang ();
@ -564,8 +712,10 @@ int main ()
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max; int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
gangs_min = workers_min = vectors_min = INT_MAX; gangs_min = workers_min = vectors_min = INT_MAX;
gangs_max = workers_max = vectors_max = INT_MIN; gangs_max = workers_max = vectors_max = INT_MIN;
#pragma acc serial /* { dg-warning "using .vector_length \\(32\\)., ignoring 1" "" { target openacc_nvidia_accel_selected } } */ \ #pragma acc serial /* { dg-line l_compute[incr c_compute] } */ \
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
/* { dg-warning {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
{ {
for (int i = 100; i > -100; i--) for (int i = 100; i > -100; i--)
{ {
@ -586,13 +736,18 @@ int main ()
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max; int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
gangs_min = workers_min = vectors_min = INT_MAX; gangs_min = workers_min = vectors_min = INT_MAX;
gangs_max = workers_max = vectors_max = INT_MIN; gangs_max = workers_max = vectors_max = INT_MIN;
#pragma acc serial copy (vectors_actual) /* { dg-warning "using .vector_length \\(32\\)., ignoring 1" "" { target openacc_nvidia_accel_selected } } */ \ #pragma acc serial /* { dg-line l_compute[incr c_compute] } */ \
copy (vectors_actual) \
copy (gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max) copy (gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max)
/* { dg-bogus "warning: region contains gang partitioned code but is not gang partitioned" "TODO 'serial'" { xfail *-*-* } .-2 } /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
{ dg-bogus "warning: region contains worker partitioned code but is not worker partitioned" "TODO 'serial'" { xfail *-*-* } .-3 } /* { dg-bogus {warning: region contains gang partitioned code but is not gang partitioned} {TODO 'serial'} { xfail *-*-* } l_compute$c_compute }
{ dg-bogus "warning: region contains vector partitioned code but is not vector partitioned" "TODO 'serial'" { xfail *-*-* } .-4 } */ { dg-bogus {warning: region contains worker partitioned code but is not worker partitioned} {TODO 'serial'} { xfail *-*-* } l_compute$c_compute }
{ dg-bogus {warning: region contains vector partitioned code but is not vector partitioned} {TODO 'serial'} { xfail *-*-* } l_compute$c_compute } */
/* { dg-warning {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
{ {
if (acc_on_device (acc_device_nvidia)) if (acc_on_device (acc_device_nvidia))
/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper. */
{ {
/* The GCC nvptx back end enforces vector_length (32). */ /* The GCC nvptx back end enforces vector_length (32). */
/* It's unclear if that's actually permissible here; /* It's unclear if that's actually permissible here;
@ -600,11 +755,25 @@ int main ()
'serial' construct might not actually be serial". */ 'serial' construct might not actually be serial". */
vectors_actual = 32; vectors_actual = 32;
} }
#pragma acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
gang \
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
for (int i = 100; i > -100; i--) for (int i = 100; i > -100; i--)
#pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) #pragma acc loop /* { dg-line l_loop_j[incr c_loop_j] } */ \
worker \
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
/* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_j$c_loop_j } */
/* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_j$c_loop_j } */
/* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_j$c_loop_j } */
for (int j = 100; j > -100; j--) for (int j = 100; j > -100; j--)
#pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) #pragma acc loop /* { dg-line l_loop_k[incr c_loop_k] } */ \
vector \
reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
/* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_k$c_loop_k } */
/* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_k$c_loop_k } */
for (int k = 100 * vectors_actual; k > -100 * vectors_actual; k--) for (int k = 100 * vectors_actual; k > -100 * vectors_actual; k--)
{ {
gangs_min = gangs_max = acc_gang (); gangs_min = gangs_max = acc_gang ();

View File

@ -2,14 +2,24 @@
! { dg-do run } ! { dg-do run }
! { dg-additional-options "-fopt-info-all-omp" }
! { dg-additional-options "-foffload=-fopt-info-all-omp" } */
! { dg-additional-options "--param=openacc-privatization=noisy" }
! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" }
! Prune a few: uninteresting, and potentially varying depending on GCC configuration (data types):
! { dg-prune-output {note: variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} } */
program reduction program reduction
integer, parameter :: n = 20 integer, parameter :: n = 20
integer :: i, red integer :: i, red
red = 0 red = 0
!$acc kernels !$acc kernels ! { dg-line l_compute1 } */
!$acc loop reduction (+:red) ! { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute1 }
!$acc loop reduction (+:red) ! { dg-line l_loop_i1 }
! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i1 }
do i = 1, n do i = 1, n
red = red + 1 red = red + 1
end do end do