libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c

   1 /* OpenACC parallelism dimensions clauses: num_gangs, num_workers,
   2    vector_length.  */
   3
   4 #include <limits.h>
   5 #include <openacc.h>
   6
   7 /* TODO: "(int) acc_device_*" casts because of the C++ acc_on_device wrapper
   8    not behaving as expected for -O0.  */
   9 #pragma acc routine seq
  10 static unsigned int __attribute__ ((optimize ("O2"))) acc_gang ()
  11 {
  12   if (acc_on_device ((int) acc_device_host))
  13     return 0;
  14   else if (acc_on_device ((int) acc_device_nvidia))
  15     {
  16       unsigned int r;
  17       asm volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (r));
  18       return r;
  19     }
  20   else
  21     __builtin_abort ();
  22 }
  23
  24 #pragma acc routine seq
  25 static unsigned int __attribute__ ((optimize ("O2"))) acc_worker ()
  26 {
  27   if (acc_on_device ((int) acc_device_host))
  28     return 0;
  29   else if (acc_on_device ((int) acc_device_nvidia))
  30     {
  31       unsigned int r;
  32       asm volatile ("mov.u32 %0,%%tid.y;" : "=r" (r));
  33       return r;
  34     }
  35   else
  36     __builtin_abort ();
  37 }
  38
  39 #pragma acc routine seq
  40 static unsigned int __attribute__ ((optimize ("O2"))) acc_vector ()
  41 {
  42   if (acc_on_device ((int) acc_device_host))
  43     return 0;
  44   else if (acc_on_device ((int) acc_device_nvidia))
  45     {
  46       unsigned int r;
  47       asm volatile ("mov.u32 %0,%%tid.x;" : "=r" (r));
  48       return r;
  49     }
  50   else
  51     __builtin_abort ();
  52 }
  53
  54
  55 int main ()
  56 {
  57   acc_init (acc_device_default);
  58
  59   /* Non-positive value.  */
  60
  61   /* GR, WS, VS.  */
  62   {
  63 #define GANGS 0 /* { dg-warning "'num_gangs' value must be positive" "" { target c } } */
  64     int gangs_actual = GANGS;
  65     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
  66     gangs_min = workers_min = vectors_min = INT_MAX;
  67     gangs_max = workers_max = vectors_max = INT_MIN;
  68 #pragma acc parallel copy (gangs_actual) \
  69   reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) \
  70   num_gangs (GANGS) /* { dg-warning "'num_gangs' value must be positive" "" { target c++ } } */
  71     {
  72       /* We're actually executing with num_gangs (1).  */
  73       gangs_actual = 1;
  74       for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i)
  75         {
  76           /* <https://gcc.gnu.org/PR80547>.  */
  77 #if 0
  78           gangs_min = gangs_max = acc_gang ();
  79           workers_min = workers_max = acc_worker ();
  80           vectors_min = vectors_max = acc_vector ();
  81 #else
  82           int gangs = acc_gang ();
  83           gangs_min = (gangs_min < gangs) ? gangs_min : gangs;
  84           gangs_max = (gangs_max > gangs) ? gangs_max : gangs;
  85           int workers = acc_worker ();
  86           workers_min = (workers_min < workers) ? workers_min : workers;
  87           workers_max = (workers_max > workers) ? workers_max : workers;
  88           int vectors = acc_vector ();
  89           vectors_min = (vectors_min < vectors) ? vectors_min : vectors;
  90           vectors_max = (vectors_max > vectors) ? vectors_max : vectors;
  91 #endif
  92         }
  93     }
  94     if (gangs_actual != 1)
  95       __builtin_abort ();
  96     if (gangs_min != 0 || gangs_max != gangs_actual - 1
  97         || workers_min != 0 || workers_max != 0
  98         || vectors_min != 0 || vectors_max != 0)
  99       __builtin_abort ();
 100 #undef GANGS
 101   }
 102
 103   /* GP, WS, VS.  */
 104   {
 105 #define GANGS 0 /* { dg-warning "'num_gangs' value must be positive" "" { target c } } */
 106     int gangs_actual = GANGS;
 107     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
 108     gangs_min = workers_min = vectors_min = INT_MAX;
 109     gangs_max = workers_max = vectors_max = INT_MIN;
 110 #pragma acc parallel copy (gangs_actual) \
 111   num_gangs (GANGS) /* { dg-warning "'num_gangs' value must be positive" "" { target c++ } } */
 112     {
 113       /* We're actually executing with num_gangs (1).  */
 114       gangs_actual = 1;
 115 #pragma acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 116       for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i)
 117         {
 118           gangs_min = gangs_max = acc_gang ();
 119           workers_min = workers_max = acc_worker ();
 120           vectors_min = vectors_max = acc_vector ();
 121         }
 122     }
 123     if (gangs_actual != 1)
 124       __builtin_abort ();
 125     if (gangs_min != 0 || gangs_max != gangs_actual - 1
 126         || workers_min != 0 || workers_max != 0
 127         || vectors_min != 0 || vectors_max != 0)
 128       __builtin_abort ();
 129 #undef GANGS
 130   }
 131
 132   /* GR, WP, VS.  */
 133   {
 134 #define WORKERS 0 /* { dg-warning "'num_workers' value must be positive" "" { target c } } */
 135     int workers_actual = WORKERS;
 136     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
 137     gangs_min = workers_min = vectors_min = INT_MAX;
 138     gangs_max = workers_max = vectors_max = INT_MIN;
 139 #pragma acc parallel copy (workers_actual) \
 140   num_workers (WORKERS) /* { dg-warning "'num_workers' value must be positive" "" { target c++ } } */
 141     {
 142       /* We're actually executing with num_workers (1).  */
 143       workers_actual = 1;
 144 #pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 145       for (int i = 100 * workers_actual; i > -100 * workers_actual; --i)
 146         {
 147           gangs_min = gangs_max = acc_gang ();
 148           workers_min = workers_max = acc_worker ();
 149           vectors_min = vectors_max = acc_vector ();
 150         }
 151     }
 152     if (workers_actual != 1)
 153       __builtin_abort ();
 154     if (gangs_min != 0 || gangs_max != 0
 155         || workers_min != 0 || workers_max != workers_actual - 1
 156         || vectors_min != 0 || vectors_max != 0)
 157       __builtin_abort ();
 158 #undef WORKERS
 159   }
 160
 161   /* GR, WS, VP.  */
 162   {
 163 #define VECTORS 0 /* { dg-warning "'vector_length' value must be positive" "" { target c } } */
 164     int vectors_actual = VECTORS;
 165     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
 166     gangs_min = workers_min = vectors_min = INT_MAX;
 167     gangs_max = workers_max = vectors_max = INT_MIN;
 168 #pragma acc parallel copy (vectors_actual) /* { dg-warning "using vector_length \\(32\\), ignoring 1" "" { target openacc_nvidia_accel_configured } } */ \
 169   vector_length (VECTORS) /* { dg-warning "'vector_length' value must be positive" "" { target c++ } } */
 170     {
 171       /* We're actually executing with vector_length (1), just the GCC nvptx
 172          back end enforces vector_length (32).  */
 173       if (acc_on_device (acc_device_nvidia))
 174         vectors_actual = 32;
 175       else
 176         vectors_actual = 1;
 177 #pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 178       for (int i = 100 * vectors_actual; i > -100 * vectors_actual; --i)
 179         {
 180           gangs_min = gangs_max = acc_gang ();
 181           workers_min = workers_max = acc_worker ();
 182           vectors_min = vectors_max = acc_vector ();
 183         }
 184     }
 185     if (acc_get_device_type () == acc_device_nvidia)
 186       {
 187         if (vectors_actual != 32)
 188           __builtin_abort ();
 189       }
 190     else
 191       if (vectors_actual != 1)
 192         __builtin_abort ();
 193     if (gangs_min != 0 || gangs_max != 0
 194         || workers_min != 0 || workers_max != 0
 195         || vectors_min != 0 || vectors_max != vectors_actual - 1)
 196       __builtin_abort ();
 197 #undef VECTORS
 198   }
 199
 200
 201   /* High value.  */
 202
 203   /* GR, WS, VS.  */
 204   {
 205     /* There is no actual limit for the number of gangs, so we try with a
 206        rather high value.  */
 207     int gangs = 12345;
 208     int gangs_actual = gangs;
 209     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
 210     gangs_min = workers_min = vectors_min = INT_MAX;
 211     gangs_max = workers_max = vectors_max = INT_MIN;
 212 #pragma acc parallel copy (gangs_actual) \
 213   reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) \
 214   num_gangs (gangs)
 215     {
 216       if (acc_on_device (acc_device_host))
 217         {
 218           /* We're actually executing with num_gangs (1).  */
 219           gangs_actual = 1;
 220         }
 221       /* As we're executing GR not GP, don't multiply with a "gangs_actual"
 222          factor.  */
 223       for (int i = 100 /* * gangs_actual */; i > -100 /* * gangs_actual */; --i)
 224         {
 225           gangs_min = gangs_max = acc_gang ();
 226           workers_min = workers_max = acc_worker ();
 227           vectors_min = vectors_max = acc_vector ();
 228         }
 229     }
 230     if (gangs_actual < 1)
 231       __builtin_abort ();
 232     if (gangs_min != 0 || gangs_max != gangs_actual - 1
 233         || workers_min != 0 || workers_max != 0
 234         || vectors_min != 0 || vectors_max != 0)
 235       __builtin_abort ();
 236   }
 237
 238   /* GP, WS, VS.  */
 239   {
 240     /* There is no actual limit for the number of gangs, so we try with a
 241        rather high value.  */
 242     int gangs = 12345;
 243     int gangs_actual = gangs;
 244     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
 245     gangs_min = workers_min = vectors_min = INT_MAX;
 246     gangs_max = workers_max = vectors_max = INT_MIN;
 247 #pragma acc parallel copy (gangs_actual) \
 248   num_gangs (gangs)
 249     {
 250       if (acc_on_device (acc_device_host))
 251         {
 252           /* We're actually executing with num_gangs (1).  */
 253           gangs_actual = 1;
 254         }
 255 #pragma acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 256       for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i)
 257         {
 258           gangs_min = gangs_max = acc_gang ();
 259           workers_min = workers_max = acc_worker ();
 260           vectors_min = vectors_max = acc_vector ();
 261         }
 262     }
 263     if (gangs_actual < 1)
 264       __builtin_abort ();
 265     if (gangs_min != 0 || gangs_max != gangs_actual - 1
 266         || workers_min != 0 || workers_max != 0
 267         || vectors_min != 0 || vectors_max != 0)
 268       __builtin_abort ();
 269   }
 270
 271   /* GR, WP, VS.  */
 272   {
 273     /* We try with an outrageously large value. */
 274 #define WORKERS 2 << 20
 275     int workers_actual = WORKERS;
 276     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
 277     gangs_min = workers_min = vectors_min = INT_MAX;
 278     gangs_max = workers_max = vectors_max = INT_MIN;
 279 #pragma acc parallel copy (workers_actual) /* { dg-warning "using num_workers \\(32\\), ignoring 2097152" "" { target openacc_nvidia_accel_configured } } */ \
 280   num_workers (WORKERS)
 281     {
 282       if (acc_on_device (acc_device_host))
 283         {
 284           /* We're actually executing with num_workers (1).  */
 285           workers_actual = 1;
 286         }
 287       else if (acc_on_device (acc_device_nvidia))
 288         {
 289           /* The GCC nvptx back end enforces num_workers (32).  */
 290           workers_actual = 32;
 291         }
 292       else
 293         __builtin_abort ();
 294 #pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 295       for (int i = 100 * workers_actual; i > -100 * workers_actual; --i)
 296         {
 297           gangs_min = gangs_max = acc_gang ();
 298           workers_min = workers_max = acc_worker ();
 299           vectors_min = vectors_max = acc_vector ();
 300         }
 301     }
 302     if (workers_actual < 1)
 303       __builtin_abort ();
 304     if (gangs_min != 0 || gangs_max != 0
 305         || workers_min != 0 || workers_max != workers_actual - 1
 306         || vectors_min != 0 || vectors_max != 0)
 307       __builtin_abort ();
 308 #undef WORKERS
 309   }
 310
 311   /* GR, WP, VS.  */
 312   {
 313     /* We try with an outrageously large value. */
 314     int workers = 2 << 20;
 315     /* For nvptx offloading, this one will not result in "using num_workers
 316        (32), ignoring runtime setting", and will in fact try to launch with
 317        "num_workers (workers)", which will run into "libgomp: cuLaunchKernel
 318        error: invalid argument".  So, limit ourselves here.  */
 319     if (acc_get_device_type () == acc_device_nvidia)
 320       workers = 32;
 321     int workers_actual = workers;
 322     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
 323     gangs_min = workers_min = vectors_min = INT_MAX;
 324     gangs_max = workers_max = vectors_max = INT_MIN;
 325 #pragma acc parallel copy (workers_actual) \
 326   num_workers (workers)
 327     {
 328       if (acc_on_device (acc_device_host))
 329         {
 330           /* We're actually executing with num_workers (1).  */
 331           workers_actual = 1;
 332         }
 333       else if (acc_on_device (acc_device_nvidia))
 334         {
 335           /* We're actually executing with num_workers (32).  */
 336           /* workers_actual = 32; */
 337         }
 338       else
 339         __builtin_abort ();
 340 #pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 341       for (int i = 100 * workers_actual; i > -100 * workers_actual; --i)
 342         {
 343           gangs_min = gangs_max = acc_gang ();
 344           workers_min = workers_max = acc_worker ();
 345           vectors_min = vectors_max = acc_vector ();
 346         }
 347     }
 348     if (workers_actual < 1)
 349       __builtin_abort ();
 350     if (gangs_min != 0 || gangs_max != 0
 351         || workers_min != 0 || workers_max != workers_actual - 1
 352         || vectors_min != 0 || vectors_max != 0)
 353       __builtin_abort ();
 354   }
 355
 356   /* GR, WS, VP.  */
 357   {
 358     /* We try with an outrageously large value. */
 359 #define VECTORS 2 << 20
 360     int vectors_actual = VECTORS;
 361     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
 362     gangs_min = workers_min = vectors_min = INT_MAX;
 363     gangs_max = workers_max = vectors_max = INT_MIN;
 364 #pragma acc parallel copy (vectors_actual) /* { dg-warning "using vector_length \\(32\\), ignoring 2097152" "" { target openacc_nvidia_accel_configured } } */ \
 365   vector_length (VECTORS)
 366     {
 367       if (acc_on_device (acc_device_host))
 368         {
 369           /* We're actually executing with vector_length (1).  */
 370           vectors_actual = 1;
 371         }
 372       else if (acc_on_device (acc_device_nvidia))
 373         {
 374           /* The GCC nvptx back end enforces vector_length (32).  */
 375           vectors_actual = 32;
 376         }
 377       else
 378         __builtin_abort ();
 379 #pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 380       for (int i = 100 * vectors_actual; i > -100 * vectors_actual; --i)
 381         {
 382           gangs_min = gangs_max = acc_gang ();
 383           workers_min = workers_max = acc_worker ();
 384           vectors_min = vectors_max = acc_vector ();
 385         }
 386     }
 387     if (vectors_actual < 1)
 388       __builtin_abort ();
 389     if (gangs_min != 0 || gangs_max != 0
 390         || workers_min != 0 || workers_max != 0
 391         || vectors_min != 0 || vectors_max != vectors_actual - 1)
 392       __builtin_abort ();
 393 #undef VECTORS
 394   }
 395
 396   /* GR, WS, VP.  */
 397   {
 398     /* We try with an outrageously large value. */
 399     int vectors = 2 << 20;
 400     int vectors_actual = vectors;
 401     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
 402     gangs_min = workers_min = vectors_min = INT_MAX;
 403     gangs_max = workers_max = vectors_max = INT_MIN;
 404 #pragma acc parallel copy (vectors_actual) /* { dg-warning "using vector_length \\(32\\), ignoring runtime setting" "" { target openacc_nvidia_accel_configured } } */ \
 405   vector_length (vectors)
 406     {
 407       if (acc_on_device (acc_device_host))
 408         {
 409           /* We're actually executing with vector_length (1).  */
 410           vectors_actual = 1;
 411         }
 412       else if (acc_on_device (acc_device_nvidia))
 413         {
 414           /* The GCC nvptx back end enforces vector_length (32).  */
 415           vectors_actual = 32;
 416         }
 417       else
 418         __builtin_abort ();
 419 #pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 420       for (int i = 100 * vectors_actual; i > -100 * vectors_actual; --i)
 421         {
 422           gangs_min = gangs_max = acc_gang ();
 423           workers_min = workers_max = acc_worker ();
 424           vectors_min = vectors_max = acc_vector ();
 425         }
 426     }
 427     if (vectors_actual < 1)
 428       __builtin_abort ();
 429     if (gangs_min != 0 || gangs_max != 0
 430         || workers_min != 0 || workers_max != 0
 431         || vectors_min != 0 || vectors_max != vectors_actual - 1)
 432       __builtin_abort ();
 433   }
 434
 435
 436   /* Composition of GP, WP, VP.  */
 437   {
 438     int gangs = 12345;
 439     /* With nvptx offloading, multi-level reductions apparently are very slow
 440        in the following case.  So, limit ourselves here.  */
 441     if (acc_get_device_type () == acc_device_nvidia)
 442       gangs = 3;
 443     int gangs_actual = gangs;
 444 #define WORKERS 3
 445     int workers_actual = WORKERS;
 446 #define VECTORS 11
 447     int vectors_actual = VECTORS;
 448     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
 449     gangs_min = workers_min = vectors_min = INT_MAX;
 450     gangs_max = workers_max = vectors_max = INT_MIN;
 451 #pragma acc parallel copy (gangs_actual, workers_actual, vectors_actual) /* { dg-warning "using vector_length \\(32\\), ignoring 11" "" { target openacc_nvidia_accel_configured } } */ \
 452   num_gangs (gangs) \
 453   num_workers (WORKERS) \
 454   vector_length (VECTORS)
 455     {
 456       if (acc_on_device (acc_device_host))
 457         {
 458           /* We're actually executing with num_gangs (1), num_workers (1),
 459              vector_length (1).  */
 460           gangs_actual = 1;
 461           workers_actual = 1;
 462           vectors_actual = 1;
 463         }
 464       else if (acc_on_device (acc_device_nvidia))
 465         {
 466           /* The GCC nvptx back end enforces vector_length (32).  */
 467           vectors_actual = 32;
 468         }
 469       else
 470         __builtin_abort ();
 471 #pragma acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 472       for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i)
 473 #pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 474         for (int j = 100 * workers_actual; j > -100 * workers_actual; --j)
 475 #pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 476           for (int k = 100 * vectors_actual; k > -100 * vectors_actual; --k)
 477             {
 478               gangs_min = gangs_max = acc_gang ();
 479               workers_min = workers_max = acc_worker ();
 480               vectors_min = vectors_max = acc_vector ();
 481             }
 482     }
 483     if (gangs_min != 0 || gangs_max != gangs_actual - 1
 484         || workers_min != 0 || workers_max != workers_actual - 1
 485         || vectors_min != 0 || vectors_max != vectors_actual - 1)
 486       __builtin_abort ();
 487 #undef VECTORS
 488 #undef WORKERS
 489   }
 490
 491
 492   /* We can't test parallelized OpenACC kernels constructs in this way: use of
 493      the acc_gang, acc_worker, acc_vector functions will make the construct
 494      unparallelizable.  */
 495
 496
 497   /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
 498      kernels.  */
 499   {
 500     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
 501     gangs_min = workers_min = vectors_min = INT_MAX;
 502     gangs_max = workers_max = vectors_max = INT_MIN;
 503 #pragma acc kernels
 504     {
 505       /* This is to make the OpenACC kernels construct unparallelizable.  */
 506       asm volatile ("" : : : "memory");
 507
 508 #pragma acc loop reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 509       for (int i = 100; i > -100; --i)
 510         {
 511           gangs_min = gangs_max = acc_gang ();
 512           workers_min = workers_max = acc_worker ();
 513           vectors_min = vectors_max = acc_vector ();
 514         }
 515     }
 516     if (gangs_min != 0 || gangs_max != 1 - 1
 517         || workers_min != 0 || workers_max != 1 - 1
 518         || vectors_min != 0 || vectors_max != 1 - 1)
 519       __builtin_abort ();
 520   }
 521
 522
 523   /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
 524      kernels even when there are explicit num_gangs, num_workers, or
 525      vector_length clauses.  */
 526   {
 527     int gangs = 5;
 528 #define WORKERS 5
 529 #define VECTORS 13
 530     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
 531     gangs_min = workers_min = vectors_min = INT_MAX;
 532     gangs_max = workers_max = vectors_max = INT_MIN;
 533 #pragma acc kernels \
 534   num_gangs (gangs) \
 535   num_workers (WORKERS) \
 536   vector_length (VECTORS)
 537     {
 538       /* This is to make the OpenACC kernels construct unparallelizable.  */
 539       asm volatile ("" : : : "memory");
 540
 541 #pragma acc loop reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
 542       for (int i = 100; i > -100; --i)
 543         {
 544           gangs_min = gangs_max = acc_gang ();
 545           workers_min = workers_max = acc_worker ();
 546           vectors_min = vectors_max = acc_vector ();
 547         }
 548     }
 549     if (gangs_min != 0 || gangs_max != 1 - 1
 550         || workers_min != 0 || workers_max != 1 - 1
 551         || vectors_min != 0 || vectors_max != 1 - 1)
 552       __builtin_abort ();
 553 #undef VECTORS
 554 #undef WORKERS
 555   }
 556
 557
 558   return 0;
 559 }