use ppcg specific macro names in generated output
[ppcg.git] / cpu.c
blob2c91dde708d2c0548c95e2634dfef1313e7d4feb
1 /*
2 * Copyright 2012 INRIA Paris-Rocquencourt
3 * Copyright 2012 Ecole Normale Superieure
5 * Use of this software is governed by the MIT license
7 * Written by Tobias Grosser, INRIA Paris-Rocquencourt,
8 * Domaine de Voluceau, Rocquenqourt, B.P. 105,
9 * 78153 Le Chesnay Cedex France
10 * and Sven Verdoolaege,
11 * Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
14 #include <limits.h>
15 #include <stdio.h>
16 #include <string.h>
18 #include <isl/aff.h>
19 #include <isl/ctx.h>
20 #include <isl/flow.h>
21 #include <isl/map.h>
22 #include <isl/ast_build.h>
23 #include <isl/schedule.h>
24 #include <isl/schedule_node.h>
25 #include <pet.h>
27 #include "ppcg.h"
28 #include "ppcg_options.h"
29 #include "cpu.h"
30 #include "print.h"
31 #include "schedule.h"
32 #include "util.h"
34 /* Representation of a statement inside a generated AST.
36 * "stmt" refers to the original statement.
37 * "ref2expr" maps the reference identifier of each access in
38 * the statement to an AST expression that should be printed
39 * at the place of the access.
41 struct ppcg_stmt {
42 struct pet_stmt *stmt;
44 isl_id_to_ast_expr *ref2expr;
47 static void ppcg_stmt_free(void *user)
49 struct ppcg_stmt *stmt = user;
50 int i;
52 if (!stmt)
53 return;
55 isl_id_to_ast_expr_free(stmt->ref2expr);
57 free(stmt);
60 /* Derive the output file name from the input file name.
61 * 'input' is the entire path of the input file. The output
62 * is the file name plus the additional extension.
64 * We will basically replace everything after the last point
65 * with '.ppcg.c'. This means file.c becomes file.ppcg.c
67 static FILE *get_output_file(const char *input, const char *output)
69 char name[PATH_MAX];
70 const char *ext;
71 const char ppcg_marker[] = ".ppcg";
72 int len;
73 FILE *file;
75 len = ppcg_extract_base_name(name, input);
77 strcpy(name + len, ppcg_marker);
78 ext = strrchr(input, '.');
79 strcpy(name + len + sizeof(ppcg_marker) - 1, ext ? ext : ".c");
81 if (!output)
82 output = name;
84 file = fopen(output, "w");
85 if (!file) {
86 fprintf(stderr, "Unable to open '%s' for writing\n", output);
87 return NULL;
90 return file;
93 /* Data used to annotate for nodes in the ast.
95 struct ast_node_userinfo {
96 /* The for node is an openmp parallel for node. */
97 int is_openmp;
100 /* Information used while building the ast.
102 struct ast_build_userinfo {
103 /* The current ppcg scop. */
104 struct ppcg_scop *scop;
106 /* Are we currently in a parallel for loop? */
107 int in_parallel_for;
110 /* Check if the current scheduling dimension is parallel.
112 * We check for parallelism by verifying that the loop does not carry any
113 * dependences.
114 * If the live_range_reordering option is set, then this currently
115 * includes the order dependences. In principle, non-zero order dependences
116 * could be allowed, but this would require privatization and/or expansion.
118 * Parallelism test: if the distance is zero in all outer dimensions, then it
119 * has to be zero in the current dimension as well.
120 * Implementation: first, translate dependences into time space, then force
121 * outer dimensions to be equal. If the distance is zero in the current
122 * dimension, then the loop is parallel.
123 * The distance is zero in the current dimension if it is a subset of a map
124 * with equal values for the current dimension.
126 static int ast_schedule_dim_is_parallel(__isl_keep isl_ast_build *build,
127 struct ppcg_scop *scop)
129 isl_union_map *schedule_node, *schedule, *deps;
130 isl_map *schedule_deps, *test;
131 isl_space *schedule_space;
132 unsigned i, dimension, is_parallel;
134 schedule = isl_ast_build_get_schedule(build);
135 schedule_space = isl_ast_build_get_schedule_space(build);
137 dimension = isl_space_dim(schedule_space, isl_dim_out) - 1;
139 deps = isl_union_map_copy(scop->dep_flow);
140 deps = isl_union_map_union(deps, isl_union_map_copy(scop->dep_false));
141 if (scop->options->live_range_reordering) {
142 isl_union_map *order = isl_union_map_copy(scop->dep_order);
143 deps = isl_union_map_union(deps, order);
145 deps = isl_union_map_apply_range(deps, isl_union_map_copy(schedule));
146 deps = isl_union_map_apply_domain(deps, schedule);
148 if (isl_union_map_is_empty(deps)) {
149 isl_union_map_free(deps);
150 isl_space_free(schedule_space);
151 return 1;
154 schedule_deps = isl_map_from_union_map(deps);
156 for (i = 0; i < dimension; i++)
157 schedule_deps = isl_map_equate(schedule_deps, isl_dim_out, i,
158 isl_dim_in, i);
160 test = isl_map_universe(isl_map_get_space(schedule_deps));
161 test = isl_map_equate(test, isl_dim_out, dimension, isl_dim_in,
162 dimension);
163 is_parallel = isl_map_is_subset(schedule_deps, test);
165 isl_space_free(schedule_space);
166 isl_map_free(test);
167 isl_map_free(schedule_deps);
169 return is_parallel;
172 /* Mark a for node openmp parallel, if it is the outermost parallel for node.
174 static void mark_openmp_parallel(__isl_keep isl_ast_build *build,
175 struct ast_build_userinfo *build_info,
176 struct ast_node_userinfo *node_info)
178 if (build_info->in_parallel_for)
179 return;
181 if (ast_schedule_dim_is_parallel(build, build_info->scop)) {
182 build_info->in_parallel_for = 1;
183 node_info->is_openmp = 1;
187 /* Allocate an ast_node_info structure and initialize it with default values.
189 static struct ast_node_userinfo *allocate_ast_node_userinfo()
191 struct ast_node_userinfo *node_info;
192 node_info = (struct ast_node_userinfo *)
193 malloc(sizeof(struct ast_node_userinfo));
194 node_info->is_openmp = 0;
195 return node_info;
198 /* Free an ast_node_info structure.
200 static void free_ast_node_userinfo(void *ptr)
202 struct ast_node_userinfo *info;
203 info = (struct ast_node_userinfo *) ptr;
204 free(info);
207 /* This method is executed before the construction of a for node. It creates
208 * an isl_id that is used to annotate the subsequently generated ast for nodes.
210 * In this function we also run the following analyses:
212 * - Detection of openmp parallel loops
214 static __isl_give isl_id *ast_build_before_for(
215 __isl_keep isl_ast_build *build, void *user)
217 isl_id *id;
218 struct ast_build_userinfo *build_info;
219 struct ast_node_userinfo *node_info;
221 build_info = (struct ast_build_userinfo *) user;
222 node_info = allocate_ast_node_userinfo();
223 id = isl_id_alloc(isl_ast_build_get_ctx(build), "", node_info);
224 id = isl_id_set_free_user(id, free_ast_node_userinfo);
226 mark_openmp_parallel(build, build_info, node_info);
228 return id;
231 /* This method is executed after the construction of a for node.
233 * It performs the following actions:
235 * - Reset the 'in_parallel_for' flag, as soon as we leave a for node,
236 * that is marked as openmp parallel.
239 static __isl_give isl_ast_node *ast_build_after_for(
240 __isl_take isl_ast_node *node, __isl_keep isl_ast_build *build,
241 void *user)
243 isl_id *id;
244 struct ast_build_userinfo *build_info;
245 struct ast_node_userinfo *info;
247 id = isl_ast_node_get_annotation(node);
248 info = isl_id_get_user(id);
250 if (info && info->is_openmp) {
251 build_info = (struct ast_build_userinfo *) user;
252 build_info->in_parallel_for = 0;
255 isl_id_free(id);
257 return node;
260 /* Find the element in scop->stmts that has the given "id".
262 static struct pet_stmt *find_stmt(struct ppcg_scop *scop, __isl_keep isl_id *id)
264 int i;
266 for (i = 0; i < scop->pet->n_stmt; ++i) {
267 struct pet_stmt *stmt = scop->pet->stmts[i];
268 isl_id *id_i;
270 id_i = isl_set_get_tuple_id(stmt->domain);
271 isl_id_free(id_i);
273 if (id_i == id)
274 return stmt;
277 isl_die(isl_id_get_ctx(id), isl_error_internal,
278 "statement not found", return NULL);
281 /* Print a user statement in the generated AST.
282 * The ppcg_stmt has been attached to the node in at_each_domain.
284 static __isl_give isl_printer *print_user(__isl_take isl_printer *p,
285 __isl_take isl_ast_print_options *print_options,
286 __isl_keep isl_ast_node *node, void *user)
288 struct ppcg_stmt *stmt;
289 isl_id *id;
291 id = isl_ast_node_get_annotation(node);
292 stmt = isl_id_get_user(id);
293 isl_id_free(id);
295 p = pet_stmt_print_body(stmt->stmt, p, stmt->ref2expr);
297 isl_ast_print_options_free(print_options);
299 return p;
303 /* Print a for loop node as an openmp parallel loop.
305 * To print an openmp parallel loop we print a normal for loop, but add
306 * "#pragma openmp parallel for" in front.
308 * Variables that are declared within the body of this for loop are
309 * automatically openmp 'private'. Iterators declared outside of the
310 * for loop are automatically openmp 'shared'. As ppcg declares all iterators
311 * at the position where they are assigned, there is no need to explicitly mark
312 * variables. Their automatically assigned type is already correct.
314 * This function only generates valid OpenMP code, if the ast was generated
315 * with the 'atomic-bounds' option enabled.
318 static __isl_give isl_printer *print_for_with_openmp(
319 __isl_keep isl_ast_node *node, __isl_take isl_printer *p,
320 __isl_take isl_ast_print_options *print_options)
322 p = isl_printer_start_line(p);
323 p = isl_printer_print_str(p, "#pragma omp parallel for");
324 p = isl_printer_end_line(p);
326 p = isl_ast_node_for_print(node, p, print_options);
328 return p;
331 /* Print a for node.
333 * Depending on how the node is annotated, we either print a normal
334 * for node or an openmp parallel for node.
336 static __isl_give isl_printer *print_for(__isl_take isl_printer *p,
337 __isl_take isl_ast_print_options *print_options,
338 __isl_keep isl_ast_node *node, void *user)
340 struct ppcg_print_info *print_info;
341 isl_id *id;
342 int openmp;
344 openmp = 0;
345 id = isl_ast_node_get_annotation(node);
347 if (id) {
348 struct ast_node_userinfo *info;
350 info = (struct ast_node_userinfo *) isl_id_get_user(id);
351 if (info && info->is_openmp)
352 openmp = 1;
355 if (openmp)
356 p = print_for_with_openmp(node, p, print_options);
357 else
358 p = isl_ast_node_for_print(node, p, print_options);
360 isl_id_free(id);
362 return p;
365 /* Index transformation callback for pet_stmt_build_ast_exprs.
367 * "index" expresses the array indices in terms of statement iterators
368 * "iterator_map" expresses the statement iterators in terms of
369 * AST loop iterators.
371 * The result expresses the array indices in terms of
372 * AST loop iterators.
374 static __isl_give isl_multi_pw_aff *pullback_index(
375 __isl_take isl_multi_pw_aff *index, __isl_keep isl_id *id, void *user)
377 isl_pw_multi_aff *iterator_map = user;
379 iterator_map = isl_pw_multi_aff_copy(iterator_map);
380 return isl_multi_pw_aff_pullback_pw_multi_aff(index, iterator_map);
383 /* Transform the accesses in the statement associated to the domain
384 * called by "node" to refer to the AST loop iterators, construct
385 * corresponding AST expressions using "build",
386 * collect them in a ppcg_stmt and annotate the node with the ppcg_stmt.
388 static __isl_give isl_ast_node *at_each_domain(__isl_take isl_ast_node *node,
389 __isl_keep isl_ast_build *build, void *user)
391 struct ppcg_scop *scop = user;
392 isl_ast_expr *expr, *arg;
393 isl_ctx *ctx;
394 isl_id *id;
395 isl_map *map;
396 isl_pw_multi_aff *iterator_map;
397 struct ppcg_stmt *stmt;
399 ctx = isl_ast_node_get_ctx(node);
400 stmt = isl_calloc_type(ctx, struct ppcg_stmt);
401 if (!stmt)
402 goto error;
404 expr = isl_ast_node_user_get_expr(node);
405 arg = isl_ast_expr_get_op_arg(expr, 0);
406 isl_ast_expr_free(expr);
407 id = isl_ast_expr_get_id(arg);
408 isl_ast_expr_free(arg);
409 stmt->stmt = find_stmt(scop, id);
410 isl_id_free(id);
411 if (!stmt->stmt)
412 goto error;
414 map = isl_map_from_union_map(isl_ast_build_get_schedule(build));
415 map = isl_map_reverse(map);
416 iterator_map = isl_pw_multi_aff_from_map(map);
417 stmt->ref2expr = pet_stmt_build_ast_exprs(stmt->stmt, build,
418 &pullback_index, iterator_map, NULL, NULL);
419 isl_pw_multi_aff_free(iterator_map);
421 id = isl_id_alloc(isl_ast_node_get_ctx(node), NULL, stmt);
422 id = isl_id_set_free_user(id, &ppcg_stmt_free);
423 return isl_ast_node_set_annotation(node, id);
424 error:
425 ppcg_stmt_free(stmt);
426 return isl_ast_node_free(node);
429 /* Set *depth (initialized to 0 by the caller) to the maximum
430 * of the schedule depths of the leaf nodes for which this function is called.
432 static isl_bool update_depth(__isl_keep isl_schedule_node *node, void *user)
434 int *depth = user;
435 int node_depth;
437 if (isl_schedule_node_get_type(node) != isl_schedule_node_leaf)
438 return isl_bool_true;
439 node_depth = isl_schedule_node_get_schedule_depth(node);
440 if (node_depth > *depth)
441 *depth = node_depth;
443 return isl_bool_false;
446 /* Code generate the scop 'scop' using "schedule"
447 * and print the corresponding C code to 'p'.
449 static __isl_give isl_printer *print_scop(struct ppcg_scop *scop,
450 __isl_take isl_schedule *schedule, __isl_take isl_printer *p,
451 struct ppcg_options *options)
453 isl_ctx *ctx = isl_printer_get_ctx(p);
454 isl_ast_build *build;
455 isl_ast_print_options *print_options;
456 isl_ast_node *tree;
457 isl_id_list *iterators;
458 struct ast_build_userinfo build_info;
459 int depth;
461 depth = 0;
462 if (isl_schedule_foreach_schedule_node_top_down(schedule, &update_depth,
463 &depth) < 0)
464 goto error;
466 build = isl_ast_build_alloc(ctx);
467 iterators = ppcg_scop_generate_names(scop, depth, "c");
468 build = isl_ast_build_set_iterators(build, iterators);
469 build = isl_ast_build_set_at_each_domain(build, &at_each_domain, scop);
471 if (options->openmp) {
472 build_info.scop = scop;
473 build_info.in_parallel_for = 0;
475 build = isl_ast_build_set_before_each_for(build,
476 &ast_build_before_for,
477 &build_info);
478 build = isl_ast_build_set_after_each_for(build,
479 &ast_build_after_for,
480 &build_info);
483 tree = isl_ast_build_node_from_schedule(build, schedule);
484 isl_ast_build_free(build);
486 print_options = isl_ast_print_options_alloc(ctx);
487 print_options = isl_ast_print_options_set_print_user(print_options,
488 &print_user, NULL);
490 print_options = isl_ast_print_options_set_print_for(print_options,
491 &print_for, NULL);
493 p = ppcg_print_macros(p, tree);
494 p = isl_ast_node_print(tree, p, print_options);
496 isl_ast_node_free(tree);
498 return p;
499 error:
500 isl_schedule_free(schedule);
501 isl_printer_free(p);
502 return NULL;
505 /* Tile the band node "node" with tile sizes "sizes" and
506 * mark all members of the resulting tile node as "atomic".
508 static __isl_give isl_schedule_node *tile(__isl_take isl_schedule_node *node,
509 __isl_take isl_multi_val *sizes)
511 node = isl_schedule_node_band_tile(node, sizes);
512 node = ppcg_set_schedule_node_type(node, isl_ast_loop_atomic);
514 return node;
517 /* Tile "node", if it is a band node with at least 2 members.
518 * The tile sizes are set from the "tile_size" option.
520 static __isl_give isl_schedule_node *tile_band(
521 __isl_take isl_schedule_node *node, void *user)
523 struct ppcg_scop *scop = user;
524 int i, n;
525 isl_space *space;
526 isl_multi_val *sizes;
528 if (isl_schedule_node_get_type(node) != isl_schedule_node_band)
529 return node;
531 n = isl_schedule_node_band_n_member(node);
532 if (n <= 1)
533 return node;
535 space = isl_schedule_node_band_get_space(node);
536 sizes = ppcg_multi_val_from_int(space, scop->options->tile_size);
538 return tile(node, sizes);
541 /* Construct schedule constraints from the dependences in ps
542 * for the purpose of computing a schedule for a CPU.
544 * The proximity constraints are set to the flow dependences.
546 * If live-range reordering is allowed then the conditional validity
547 * constraints are set to the order dependences with the flow dependences
548 * as condition. That is, a live-range (flow dependence) will be either
549 * local to an iteration of a band or all adjacent order dependences
550 * will be respected by the band.
551 * The validity constraints are set to the union of the flow dependences
552 * and the forced dependences, while the coincidence constraints
553 * are set to the union of the flow dependences, the forced dependences and
554 * the order dependences.
556 * If live-range reordering is not allowed, then both the validity
557 * and the coincidence constraints are set to the union of the flow
558 * dependences and the false dependences.
560 * Note that the coincidence constraints are only set when the "openmp"
561 * options is set. Even though the way openmp pragmas are introduced
562 * does not rely on the coincident property of the schedule band members,
563 * the coincidence constraints do affect the way the schedule is constructed,
564 * such that more schedule dimensions should be detected as parallel
565 * by ast_schedule_dim_is_parallel.
566 * Since the order dependences are also taken into account by
567 * ast_schedule_dim_is_parallel, they are also added to
568 * the coincidence constraints. If the openmp handling learns
569 * how to privatize some memory, then the corresponding order
570 * dependences can be removed from the coincidence constraints.
572 static __isl_give isl_schedule_constraints *construct_cpu_schedule_constraints(
573 struct ppcg_scop *ps)
575 isl_schedule_constraints *sc;
576 isl_union_map *validity, *coincidence;
578 sc = isl_schedule_constraints_on_domain(isl_union_set_copy(ps->domain));
579 if (ps->options->live_range_reordering) {
580 sc = isl_schedule_constraints_set_conditional_validity(sc,
581 isl_union_map_copy(ps->tagged_dep_flow),
582 isl_union_map_copy(ps->tagged_dep_order));
583 validity = isl_union_map_copy(ps->dep_flow);
584 validity = isl_union_map_union(validity,
585 isl_union_map_copy(ps->dep_forced));
586 if (ps->options->openmp) {
587 coincidence = isl_union_map_copy(validity);
588 coincidence = isl_union_map_union(coincidence,
589 isl_union_map_copy(ps->dep_order));
591 } else {
592 validity = isl_union_map_copy(ps->dep_flow);
593 validity = isl_union_map_union(validity,
594 isl_union_map_copy(ps->dep_false));
595 if (ps->options->openmp)
596 coincidence = isl_union_map_copy(validity);
598 if (ps->options->openmp)
599 sc = isl_schedule_constraints_set_coincidence(sc, coincidence);
600 sc = isl_schedule_constraints_set_validity(sc, validity);
601 sc = isl_schedule_constraints_set_proximity(sc,
602 isl_union_map_copy(ps->dep_flow));
604 return sc;
607 /* Compute a schedule for the scop "ps".
609 * First derive the appropriate schedule constraints from the dependences
610 * in "ps" and then compute a schedule from those schedule constraints.
612 static __isl_give isl_schedule *compute_cpu_schedule(struct ppcg_scop *ps)
614 isl_schedule_constraints *sc;
615 isl_schedule *schedule;
617 if (!ps)
618 return NULL;
620 sc = construct_cpu_schedule_constraints(ps);
622 if (ps->options->debug->dump_schedule_constraints)
623 isl_schedule_constraints_dump(sc);
624 schedule = isl_schedule_constraints_compute_schedule(sc);
626 return schedule;
629 /* Compute a new schedule to the scop "ps" if the reschedule option is set.
630 * Otherwise, return a copy of the original schedule.
632 static __isl_give isl_schedule *optionally_compute_schedule(void *user)
634 struct ppcg_scop *ps = user;
636 if (!ps)
637 return NULL;
638 if (!ps->options->reschedule)
639 return isl_schedule_copy(ps->schedule);
640 return compute_cpu_schedule(ps);
643 /* Compute a schedule based on the dependences in "ps" and
644 * tile it if requested by the user.
646 static __isl_give isl_schedule *get_schedule(struct ppcg_scop *ps,
647 struct ppcg_options *options)
649 isl_ctx *ctx;
650 isl_schedule *schedule;
652 if (!ps)
653 return NULL;
655 ctx = isl_union_set_get_ctx(ps->domain);
656 schedule = ppcg_get_schedule(ctx, options,
657 &optionally_compute_schedule, ps);
658 if (ps->options->tile)
659 schedule = isl_schedule_map_schedule_node_bottom_up(schedule,
660 &tile_band, ps);
662 return schedule;
665 /* Generate CPU code for the scop "ps" using "schedule" and
666 * print the corresponding C code to "p", including variable declarations.
668 static __isl_give isl_printer *print_cpu_with_schedule(
669 __isl_take isl_printer *p, struct ppcg_scop *ps,
670 __isl_take isl_schedule *schedule, struct ppcg_options *options)
672 int hidden;
673 isl_set *context;
675 p = isl_printer_start_line(p);
676 p = isl_printer_print_str(p, "/* ppcg generated CPU code */");
677 p = isl_printer_end_line(p);
679 p = isl_printer_start_line(p);
680 p = isl_printer_end_line(p);
682 p = ppcg_set_macro_names(p);
683 p = isl_ast_op_type_print_macro(isl_ast_op_fdiv_q, p);
684 p = ppcg_print_exposed_declarations(p, ps);
685 hidden = ppcg_scop_any_hidden_declarations(ps);
686 if (hidden) {
687 p = ppcg_start_block(p);
688 p = ppcg_print_hidden_declarations(p, ps);
691 context = isl_set_copy(ps->context);
692 context = isl_set_from_params(context);
693 schedule = isl_schedule_insert_context(schedule, context);
694 if (options->debug->dump_final_schedule)
695 isl_schedule_dump(schedule);
696 p = print_scop(ps, schedule, p, options);
697 if (hidden)
698 p = ppcg_end_block(p);
700 return p;
703 /* Generate CPU code for the scop "ps" and print the corresponding C code
704 * to "p", including variable declarations.
706 __isl_give isl_printer *print_cpu(__isl_take isl_printer *p,
707 struct ppcg_scop *ps, struct ppcg_options *options)
709 isl_schedule *schedule;
711 schedule = isl_schedule_copy(ps->schedule);
712 return print_cpu_with_schedule(p, ps, schedule, options);
715 /* Generate CPU code for "scop" and print it to "p".
717 * First obtain a schedule for "scop" and then print code for "scop"
718 * using that schedule.
720 static __isl_give isl_printer *generate(__isl_take isl_printer *p,
721 struct ppcg_scop *scop, struct ppcg_options *options)
723 isl_schedule *schedule;
725 schedule = get_schedule(scop, options);
727 return print_cpu_with_schedule(p, scop, schedule, options);
730 /* Wrapper around generate for use as a ppcg_transform callback.
732 static __isl_give isl_printer *print_cpu_wrap(__isl_take isl_printer *p,
733 struct ppcg_scop *scop, void *user)
735 struct ppcg_options *options = user;
737 return generate(p, scop, options);
740 /* Transform the code in the file called "input" by replacing
741 * all scops by corresponding CPU code and write the results to a file
742 * called "output".
744 int generate_cpu(isl_ctx *ctx, struct ppcg_options *options,
745 const char *input, const char *output)
747 FILE *output_file;
748 int r;
750 output_file = get_output_file(input, output);
751 if (!output_file)
752 return -1;
754 r = ppcg_transform(ctx, input, output_file, options,
755 &print_cpu_wrap, options);
757 fclose(output_file);
759 return r;