gpu_tree.c

   1 /*
   2  * Copyright 2013      Ecole Normale Superieure
   3  *
   4  * Use of this software is governed by the MIT license
   5  *
   6  * Written by Sven Verdoolaege,
   7  * Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
   8  */
   9
  10 #include <string.h>
  11
  12 #include <isl/set.h>
  13 #include <isl/union_set.h>
  14
  15 #include "gpu_tree.h"
  16
  17 /* The functions in this file are used to navigate part of a schedule tree
  18  * that is mapped to blocks.  Initially, this part consists of a linear
  19  * branch segment with a mark node with name "kernel" on the outer end
  20  * and a mark node with name "thread" on the inner end.
  21  * During the mapping to blocks, branching may be introduced, but only
  22  * one of the elements in each sequence contains the "thread" mark.
  23  * The filter of this element (and only this filter) contains
  24  * domain elements identified by the "core" argument of the functions
  25  * that move down this tree.
  26  *
  27  * Synchronization statements have a name that starts with "sync" and
  28  * a user pointer pointing to the kernel that contains the synchronization.
  29  * The functions inserting or detecting synchronizations take a ppcg_kernel
  30  * argument to be able to create or identify such statements.
  31  * They may also use two fields in this structure, the "core" field
  32  * to move around in the tree and the "n_sync" field to make sure that
  33  * each synchronization has a different name (within the kernel).
  34  */
  35
  36 /* Is "node" a mark node with an identifier called "name"?
  37  */
  38 static int is_marked(__isl_keep isl_schedule_node *node, const char *name)
  39 {
  40         isl_id *mark;
  41         int has_name;
  42
  43         if (!node)
  44                 return -1;
  45
  46         if (isl_schedule_node_get_type(node) != isl_schedule_node_mark)
  47                 return 0;
  48
  49         mark = isl_schedule_node_mark_get_id(node);
  50         if (!mark)
  51                 return -1;
  52
  53         has_name = !strcmp(isl_id_get_name(mark), name);
  54         isl_id_free(mark);
  55
  56         return has_name;
  57 }
  58
  59 /* Is "node" a mark node with an identifier called "kernel"?
  60  */
  61 int gpu_tree_node_is_kernel(__isl_keep isl_schedule_node *node)
  62 {
  63         return is_marked(node, "kernel");
  64 }
  65
  66 /* Is "node" a mark node with an identifier called "shared"?
  67  */
  68 static int node_is_shared(__isl_keep isl_schedule_node *node)
  69 {
  70         return is_marked(node, "shared");
  71 }
  72
  73 /* Is "node" a mark node with an identifier called "thread"?
  74  */
  75 static int node_is_thread(__isl_keep isl_schedule_node *node)
  76 {
  77         return is_marked(node, "thread");
  78 }
  79
  80 /* Insert a mark node with identifier "shared" in front of "node".
  81  */
  82 static __isl_give isl_schedule_node *insert_shared(
  83         __isl_take isl_schedule_node *node)
  84 {
  85         isl_ctx *ctx;
  86         isl_id *id;
  87
  88         ctx = isl_schedule_node_get_ctx(node);
  89         id = isl_id_alloc(ctx, "shared", NULL);
  90         node = isl_schedule_node_insert_mark(node, id);
  91
  92         return node;
  93 }
  94
  95 /* Insert a "shared" mark in front of the "thread" mark
  96  * provided the linear branch between "node" and the "thread" mark
  97  * does not contain such a "shared" mark already.
  98  *
  99  * As a side effect, this function checks that the subtree at "node"
 100  * actually contains a "thread" mark and that there is no branching
 101  * in between "node" and this "thread" mark.
 102  */
 103 __isl_give isl_schedule_node *gpu_tree_insert_shared_before_thread(
 104         __isl_take isl_schedule_node *node)
 105 {
 106         int depth0, depth;
 107         int any_shared = 0;
 108
 109         if (!node)
 110                 return NULL;
 111
 112         depth0 = isl_schedule_node_get_tree_depth(node);
 113
 114         for (;;) {
 115                 int is_thread;
 116                 int n;
 117
 118                 if (!any_shared) {
 119                         any_shared = node_is_shared(node);
 120                         if (any_shared < 0)
 121                                 return isl_schedule_node_free(node);
 122                 }
 123                 is_thread = node_is_thread(node);
 124                 if (is_thread < 0)
 125                         return isl_schedule_node_free(node);
 126                 if (is_thread)
 127                         break;
 128                 n = isl_schedule_node_n_children(node);
 129                 if (n == 0)
 130                         isl_die(isl_schedule_node_get_ctx(node),
 131                                 isl_error_invalid,
 132                                 "no thread marker found",
 133                                 return isl_schedule_node_free(node));
 134                 if (n > 1)
 135                         isl_die(isl_schedule_node_get_ctx(node),
 136                                 isl_error_invalid,
 137                                 "expecting single thread marker",
 138                                 return isl_schedule_node_free(node));
 139
 140                 node = isl_schedule_node_child(node, 0);
 141         }
 142
 143         if (!any_shared)
 144                 node = insert_shared(node);
 145         depth = isl_schedule_node_get_tree_depth(node);
 146         node = isl_schedule_node_ancestor(node, depth - depth0);
 147
 148         return node;
 149 }
 150
 151 /* Assuming "node" is a filter node, does it correspond to the branch
 152  * that contains the "thread" mark, i.e., does it contain any elements
 153  * in "core"?
 154  */
 155 static int node_is_core(__isl_keep isl_schedule_node *node,
 156         __isl_keep isl_union_set *core)
 157 {
 158         int disjoint;
 159         isl_union_set *filter;
 160
 161         filter = isl_schedule_node_filter_get_filter(node);
 162         disjoint = isl_union_set_is_disjoint(filter, core);
 163         isl_union_set_free(filter);
 164         if (disjoint < 0)
 165                 return -1;
 166
 167         return !disjoint;
 168 }
 169
 170 /* Move to the only child of "node" that has the "thread" mark as descendant,
 171  * where the branch containing this mark is identified by the domain elements
 172  * in "core".
 173  *
 174  * If "node" is not a sequence, then it only has one child and we move
 175  * to that single child.
 176  * Otherwise, we check each of the filters in the children, pick
 177  * the one that corresponds to "core" and return a pointer to the child
 178  * of the filter node.
 179  */
 180 static __isl_give isl_schedule_node *core_child(
 181         __isl_take isl_schedule_node *node, __isl_keep isl_union_set *core)
 182 {
 183         int i, n;
 184
 185         if (isl_schedule_node_get_type(node) != isl_schedule_node_sequence)
 186                 return isl_schedule_node_child(node, 0);
 187
 188         n = isl_schedule_node_n_children(node);
 189         for (i = 0; i < n; ++i) {
 190                 int is_core;
 191
 192                 node = isl_schedule_node_child(node, i);
 193                 is_core = node_is_core(node, core);
 194
 195                 if (is_core < 0)
 196                         return isl_schedule_node_free(node);
 197                 if (is_core)
 198                         return isl_schedule_node_child(node, 0);
 199
 200                 node = isl_schedule_node_parent(node);
 201         }
 202
 203         isl_die(isl_schedule_node_get_ctx(node), isl_error_internal,
 204                 "core child not found", return isl_schedule_node_free(node));
 205 }
 206
 207 /* Move down the branch between "kernel" and "thread" until
 208  * the "shared" mark is reached, where the branch containing the "shared"
 209  * mark is identified by the domain elements in "core".
 210  */
 211 __isl_give isl_schedule_node *gpu_tree_move_down_to_shared(
 212         __isl_take isl_schedule_node *node, __isl_keep isl_union_set *core)
 213 {
 214         int is_shared;
 215
 216         while ((is_shared = node_is_shared(node)) == 0)
 217                 node = core_child(node, core);
 218         if (is_shared < 0)
 219                 node = isl_schedule_node_free(node);
 220
 221         return node;
 222 }
 223
 224 /* Move down the branch between "kernel" and "thread" until
 225  * the "thread" mark is reached, where the branch containing the "thread"
 226  * mark is identified by the domain elements in "core".
 227  */
 228 __isl_give isl_schedule_node *gpu_tree_move_down_to_thread(
 229         __isl_take isl_schedule_node *node, __isl_keep isl_union_set *core)
 230 {
 231         int is_thread;
 232
 233         while ((is_thread = node_is_thread(node)) == 0)
 234                 node = core_child(node, core);
 235         if (is_thread < 0)
 236                 node = isl_schedule_node_free(node);
 237
 238         return node;
 239 }
 240
 241 /* Move up the tree underneath the "thread" mark until
 242  * the "thread" mark is reached.
 243  */
 244 __isl_give isl_schedule_node *gpu_tree_move_up_to_thread(
 245         __isl_take isl_schedule_node *node)
 246 {
 247         int is_thread;
 248
 249         while ((is_thread = node_is_thread(node)) == 0)
 250                 node = isl_schedule_node_parent(node);
 251         if (is_thread < 0)
 252                 node = isl_schedule_node_free(node);
 253
 254         return node;
 255 }
 256
 257 /* Move up the tree underneath the "kernel" mark until
 258  * the "kernel" mark is reached.
 259  */
 260 __isl_give isl_schedule_node *gpu_tree_move_up_to_kernel(
 261         __isl_take isl_schedule_node *node)
 262 {
 263         int is_kernel;
 264
 265         while ((is_kernel = gpu_tree_node_is_kernel(node)) == 0)
 266                 node = isl_schedule_node_parent(node);
 267         if (is_kernel < 0)
 268                 node = isl_schedule_node_free(node);
 269
 270         return node;
 271 }
 272
 273 /* Move down from the "kernel" mark (or at least a node with schedule
 274  * depth smaller than or equal to "depth") to a band node at schedule
 275  * depth "depth".  The "thread" mark is assumed to have a schedule
 276  * depth greater than or equal to "depth".  The branch containing the
 277  * "thread" mark is identified by the domain elements in "core".
 278  *
 279  * If the desired schedule depth is in the middle of band node,
 280  * then the band node is split into two pieces, the second piece
 281  * at the desired schedule depth.
 282  */
 283 __isl_give isl_schedule_node *gpu_tree_move_down_to_depth(
 284         __isl_take isl_schedule_node *node, int depth,
 285         __isl_keep isl_union_set *core)
 286 {
 287         int is_shared;
 288         int is_thread = 0;
 289
 290         while (node && isl_schedule_node_get_schedule_depth(node) < depth) {
 291                 if (isl_schedule_node_get_type(node) ==
 292                                                     isl_schedule_node_band) {
 293                         int node_depth, node_dim;
 294                         node_depth = isl_schedule_node_get_schedule_depth(node);
 295                         node_dim = isl_schedule_node_band_n_member(node);
 296                         if (node_depth + node_dim > depth)
 297                                 node = isl_schedule_node_band_split(node,
 298                                                         depth - node_depth);
 299                 }
 300                 node = core_child(node, core);
 301         }
 302         while ((is_shared = node_is_shared(node)) == 0 &&
 303             (is_thread = node_is_thread(node)) == 0 &&
 304             isl_schedule_node_get_type(node) != isl_schedule_node_band)
 305                 node = core_child(node, core);
 306         if (is_shared < 0 || is_thread < 0)
 307                 node = isl_schedule_node_free(node);
 308
 309         return node;
 310 }
 311
 312 /* Create a union set containing a single set with a tuple identifier
 313  * called "syncX" and user pointer equal to "kernel".
 314  */
 315 static __isl_give isl_union_set *create_sync_domain(struct ppcg_kernel *kernel)
 316 {
 317         isl_space *space;
 318         isl_id *id;
 319         char name[40];
 320
 321         space = isl_space_set_alloc(kernel->ctx, 0, 0);
 322         snprintf(name, sizeof(name), "sync%d", kernel->n_sync++);
 323         id = isl_id_alloc(kernel->ctx, name, kernel);
 324         space = isl_space_set_tuple_id(space, isl_dim_set, id);
 325         return isl_union_set_from_set(isl_set_universe(space));
 326 }
 327
 328 /* Is "id" the identifier of a synchronization statement inside "kernel"?
 329  * That is, does its name start with "sync" and does it point to "kernel"?
 330  */
 331 int gpu_tree_id_is_sync(__isl_keep isl_id *id, struct ppcg_kernel *kernel)
 332 {
 333         const char *name;
 334
 335         name = isl_id_get_name(id);
 336         if (!name)
 337                 return 0;
 338         else if (strncmp(name, "sync", 4))
 339                 return 0;
 340         return isl_id_get_user(id) == kernel;
 341 }
 342
 343 /* Does "domain" consist of a single set with a tuple identifier
 344  * corresponding to a synchronization for "kernel"?
 345  */
 346 static int domain_is_sync(__isl_keep isl_union_set *domain,
 347         struct ppcg_kernel *kernel)
 348 {
 349         int is_sync;
 350         isl_id *id;
 351         isl_set *set;
 352
 353         if (isl_union_set_n_set(domain) != 1)
 354                 return 0;
 355         set = isl_set_from_union_set(isl_union_set_copy(domain));
 356         id = isl_set_get_tuple_id(set);
 357         is_sync = gpu_tree_id_is_sync(id, kernel);
 358         isl_id_free(id);
 359         isl_set_free(set);
 360
 361         return is_sync;
 362 }
 363
 364 /* Does "node" point to a filter selecting a synchronization statement
 365  * for "kernel"?
 366  */
 367 static int node_is_sync_filter(__isl_keep isl_schedule_node *node,
 368         struct ppcg_kernel *kernel)
 369 {
 370         int is_sync;
 371         enum isl_schedule_node_type type;
 372         isl_union_set *domain;
 373
 374         if (!node)
 375                 return -1;
 376         type = isl_schedule_node_get_type(node);
 377         if (type != isl_schedule_node_filter)
 378                 return 0;
 379         domain = isl_schedule_node_filter_get_filter(node);
 380         is_sync = domain_is_sync(domain, kernel);
 381         isl_union_set_free(domain);
 382
 383         return is_sync;
 384 }
 385
 386 /* Is "node" part of a sequence with a previous synchronization statement
 387  * for "kernel"?
 388  * That is, is the parent of "node" a filter such that there is
 389  * a previous filter that picks out exactly such a synchronization statement?
 390  */
 391 static int has_preceding_sync(__isl_keep isl_schedule_node *node,
 392         struct ppcg_kernel *kernel)
 393 {
 394         int found = 0;
 395
 396         node = isl_schedule_node_copy(node);
 397         node = isl_schedule_node_parent(node);
 398         while (!found && isl_schedule_node_has_previous_sibling(node)) {
 399                 node = isl_schedule_node_previous_sibling(node);
 400                 if (!node)
 401                         break;
 402                 found = node_is_sync_filter(node, kernel);
 403         }
 404         if (!node)
 405                 found = -1;
 406         isl_schedule_node_free(node);
 407
 408         return found;
 409 }
 410
 411 /* Is "node" part of a sequence with a subsequent synchronization statement
 412  * for "kernel"?
 413  * That is, is the parent of "node" a filter such that there is
 414  * a subsequent filter that picks out exactly such a synchronization statement?
 415  */
 416 static int has_following_sync(__isl_keep isl_schedule_node *node,
 417         struct ppcg_kernel *kernel)
 418 {
 419         int found = 0;
 420
 421         node = isl_schedule_node_copy(node);
 422         node = isl_schedule_node_parent(node);
 423         while (!found && isl_schedule_node_has_next_sibling(node)) {
 424                 node = isl_schedule_node_next_sibling(node);
 425                 if (!node)
 426                         break;
 427                 found = node_is_sync_filter(node, kernel);
 428         }
 429         if (!node)
 430                 found = -1;
 431         isl_schedule_node_free(node);
 432
 433         return found;
 434 }
 435
 436 /* Does the subtree rooted at "node" (which is a band node) contain
 437  * any synchronization statement for "kernel" that precedes
 438  * the core computation of "kernel" (identified by the elements
 439  * in kernel->core)?
 440  */
 441 static int has_sync_before_core(__isl_keep isl_schedule_node *node,
 442         struct ppcg_kernel *kernel)
 443 {
 444         int has_sync = 0;
 445         int is_thread;
 446
 447         node = isl_schedule_node_copy(node);
 448         while ((is_thread = node_is_thread(node)) == 0) {
 449                 node = core_child(node, kernel->core);
 450                 has_sync = has_preceding_sync(node, kernel);
 451                 if (has_sync < 0 || has_sync)
 452                         break;
 453         }
 454         if (is_thread < 0 || !node)
 455                 has_sync = -1;
 456         isl_schedule_node_free(node);
 457
 458         return has_sync;
 459 }
 460
 461 /* Does the subtree rooted at "node" (which is a band node) contain
 462  * any synchronization statement for "kernel" that follows
 463  * the core computation of "kernel" (identified by the elements
 464  * in kernel->core)?
 465  */
 466 static int has_sync_after_core(__isl_keep isl_schedule_node *node,
 467         struct ppcg_kernel *kernel)
 468 {
 469         int has_sync = 0;
 470         int is_thread;
 471
 472         node = isl_schedule_node_copy(node);
 473         while ((is_thread = node_is_thread(node)) == 0) {
 474                 node = core_child(node, kernel->core);
 475                 has_sync = has_following_sync(node, kernel);
 476                 if (has_sync < 0 || has_sync)
 477                         break;
 478         }
 479         if (is_thread < 0 || !node)
 480                 has_sync = -1;
 481         isl_schedule_node_free(node);
 482
 483         return has_sync;
 484 }
 485
 486 /* Insert (or extend) an extension on top of "node" that puts
 487  * a synchronization node for "kernel" before "node".
 488  * Return a pointer to the original node in the updated schedule tree.
 489  */
 490 static __isl_give isl_schedule_node *insert_sync_before(
 491         __isl_take isl_schedule_node *node, struct ppcg_kernel *kernel)
 492 {
 493         isl_union_set *domain;
 494         isl_schedule_node *graft;
 495
 496         if (!node)
 497                 return NULL;
 498
 499         domain = create_sync_domain(kernel);
 500         graft = isl_schedule_node_from_domain(domain);
 501         node = isl_schedule_node_graft_before(node, graft);
 502
 503         return node;
 504 }
 505
 506 /* Insert (or extend) an extension on top of "node" that puts
 507  * a synchronization node for "kernel" afater "node".
 508  * Return a pointer to the original node in the updated schedule tree.
 509  */
 510 static __isl_give isl_schedule_node *insert_sync_after(
 511         __isl_take isl_schedule_node *node, struct ppcg_kernel *kernel)
 512 {
 513         isl_union_set *domain;
 514         isl_schedule_node *graft;
 515
 516         if (!node)
 517                 return NULL;
 518
 519         domain = create_sync_domain(kernel);
 520         graft = isl_schedule_node_from_domain(domain);
 521         node = isl_schedule_node_graft_after(node, graft);
 522
 523         return node;
 524 }
 525
 526 /* Insert an extension on top of "node" that puts a synchronization node
 527  * for "kernel" before "node" unless there already is
 528  * such a synchronization node.
 529  */
 530 __isl_give isl_schedule_node *gpu_tree_ensure_preceding_sync(
 531         __isl_take isl_schedule_node *node, struct ppcg_kernel *kernel)
 532 {
 533         int has_sync;
 534
 535         has_sync = has_preceding_sync(node, kernel);
 536         if (has_sync < 0)
 537                 return isl_schedule_node_free(node);
 538         if (has_sync)
 539                 return node;
 540         return insert_sync_before(node, kernel);
 541 }
 542
 543 /* Insert an extension on top of "node" that puts a synchronization node
 544  * for "kernel" after "node" unless there already is
 545  * such a synchronization node.
 546  */
 547 __isl_give isl_schedule_node *gpu_tree_ensure_following_sync(
 548         __isl_take isl_schedule_node *node, struct ppcg_kernel *kernel)
 549 {
 550         int has_sync;
 551
 552         has_sync = has_following_sync(node, kernel);
 553         if (has_sync < 0)
 554                 return isl_schedule_node_free(node);
 555         if (has_sync)
 556                 return node;
 557         return insert_sync_after(node, kernel);
 558 }
 559
 560 /* Insert an extension on top of "node" that puts a synchronization node
 561  * for "kernel" after "node" unless there already is such a sync node or
 562  * "node" itself already * contains a synchronization node following
 563  * the core computation of "kernel".
 564  */
 565 __isl_give isl_schedule_node *gpu_tree_ensure_sync_after_core(
 566         __isl_take isl_schedule_node *node, struct ppcg_kernel *kernel)
 567 {
 568         int has_sync;
 569
 570         has_sync = has_sync_after_core(node, kernel);
 571         if (has_sync < 0)
 572                 return isl_schedule_node_free(node);
 573         if (has_sync)
 574                 return node;
 575         has_sync = has_following_sync(node, kernel);
 576         if (has_sync < 0)
 577                 return isl_schedule_node_free(node);
 578         if (has_sync)
 579                 return node;
 580         return insert_sync_after(node, kernel);
 581 }
 582
 583 /* Move left in the sequence on top of "node" to a synchronization node
 584  * for "kernel".
 585  * If "node" itself contains a synchronization node preceding
 586  * the core computation of "kernel", then return "node" itself.
 587  * Otherwise, if "node" does not have a preceding synchronization node,
 588  * then create one first.
 589  */
 590 __isl_give isl_schedule_node *gpu_tree_move_left_to_sync(
 591         __isl_take isl_schedule_node *node, struct ppcg_kernel *kernel)
 592 {
 593         int has_sync;
 594         int is_sync;
 595
 596         has_sync = has_sync_before_core(node, kernel);
 597         if (has_sync < 0)
 598                 return isl_schedule_node_free(node);
 599         if (has_sync)
 600                 return node;
 601         node = gpu_tree_ensure_preceding_sync(node, kernel);
 602         node = isl_schedule_node_parent(node);
 603         while ((is_sync = node_is_sync_filter(node, kernel)) == 0)
 604                 node = isl_schedule_node_previous_sibling(node);
 605         if (is_sync < 0)
 606                 node = isl_schedule_node_free(node);
 607         node = isl_schedule_node_child(node, 0);
 608
 609         return node;
 610 }
 611
 612 /* Move right in the sequence on top of "node" to a synchronization node
 613  * for "kernel".
 614  * If "node" itself contains a synchronization node following
 615  * the core computation of "kernel", then return "node" itself.
 616  * Otherwise, if "node" does not have a following synchronization node,
 617  * then create one first.
 618  */
 619 __isl_give isl_schedule_node *gpu_tree_move_right_to_sync(
 620         __isl_take isl_schedule_node *node, struct ppcg_kernel *kernel)
 621 {
 622         int has_sync;
 623         int is_sync;
 624
 625         has_sync = has_sync_after_core(node, kernel);
 626         if (has_sync < 0)
 627                 return isl_schedule_node_free(node);
 628         if (has_sync)
 629                 return node;
 630         node = gpu_tree_ensure_following_sync(node, kernel);
 631         node = isl_schedule_node_parent(node);
 632         while ((is_sync = node_is_sync_filter(node, kernel)) == 0)
 633                 node = isl_schedule_node_next_sibling(node);
 634         if (is_sync < 0)
 635                 node = isl_schedule_node_free(node);
 636         node = isl_schedule_node_child(node, 0);
 637
 638         return node;
 639 }