2 * Copyright 2012 INRIA Paris-Rocquencourt
4 * Use of this software is governed by the MIT license
6 * Written by Tobias Grosser, INRIA Paris-Rocquencourt,
7 * Domaine de Voluceau, Rocquenqourt, B.P. 105,
8 * 78153 Le Chesnay Cedex France
18 #include <isl/ast_build.h>
22 #include "ppcg_options.h"
26 /* Representation of a statement inside a generated AST.
28 * "stmt" refers to the original statement.
29 * "ref2expr" maps the reference identifier of each access in
30 * the statement to an AST expression that should be printed
31 * at the place of the access.
34 struct pet_stmt
*stmt
;
36 isl_id_to_ast_expr
*ref2expr
;
39 static void ppcg_stmt_free(void *user
)
41 struct ppcg_stmt
*stmt
= user
;
47 isl_id_to_ast_expr_free(stmt
->ref2expr
);
52 /* Derive the output file name from the input file name.
53 * 'input' is the entire path of the input file. The output
54 * is the file name plus the additional extension.
56 * We will basically replace everything after the last point
57 * with '.ppcg.c'. This means file.c becomes file.ppcg.c
59 static FILE *get_output_file(const char *input
, const char *output
)
63 const char ppcg_marker
[] = ".ppcg";
66 len
= ppcg_extract_base_name(name
, input
);
68 strcpy(name
+ len
, ppcg_marker
);
69 ext
= strrchr(input
, '.');
70 strcpy(name
+ len
+ sizeof(ppcg_marker
) - 1, ext
? ext
: ".c");
75 return fopen(output
, "w");
78 /* Data used to annotate for nodes in the ast.
80 struct ast_node_userinfo
{
81 /* The for node is an openmp parallel for node. */
85 /* Information used while building the ast.
87 struct ast_build_userinfo
{
88 /* The current ppcg scop. */
89 struct ppcg_scop
*scop
;
91 /* Are we currently in a parallel for loop? */
95 /* Check if the current scheduling dimension is parallel.
97 * We check for parallelism by verifying that the loop does not carry any
99 * If the live_range_reordering option is set, then this currently
100 * includes the order dependences. In principle, non-zero order dependences
101 * could be allowed, but this would require privatization and/or expansion.
103 * Parallelism test: if the distance is zero in all outer dimensions, then it
104 * has to be zero in the current dimension as well.
105 * Implementation: first, translate dependences into time space, then force
106 * outer dimensions to be equal. If the distance is zero in the current
107 * dimension, then the loop is parallel.
108 * The distance is zero in the current dimension if it is a subset of a map
109 * with equal values for the current dimension.
111 static int ast_schedule_dim_is_parallel(__isl_keep isl_ast_build
*build
,
112 struct ppcg_scop
*scop
)
114 isl_union_map
*schedule_node
, *schedule
, *deps
;
115 isl_map
*schedule_deps
, *test
;
116 isl_space
*schedule_space
;
117 unsigned i
, dimension
, is_parallel
;
119 schedule
= isl_ast_build_get_schedule(build
);
120 schedule_space
= isl_ast_build_get_schedule_space(build
);
122 dimension
= isl_space_dim(schedule_space
, isl_dim_out
) - 1;
124 deps
= isl_union_map_copy(scop
->dep_flow
);
125 deps
= isl_union_map_union(deps
, isl_union_map_copy(scop
->dep_false
));
126 if (scop
->options
->live_range_reordering
) {
127 isl_union_map
*order
= isl_union_map_copy(scop
->dep_order
);
128 deps
= isl_union_map_union(deps
, order
);
130 deps
= isl_union_map_apply_range(deps
, isl_union_map_copy(schedule
));
131 deps
= isl_union_map_apply_domain(deps
, schedule
);
133 if (isl_union_map_is_empty(deps
)) {
134 isl_union_map_free(deps
);
135 isl_space_free(schedule_space
);
139 schedule_deps
= isl_map_from_union_map(deps
);
141 for (i
= 0; i
< dimension
; i
++)
142 schedule_deps
= isl_map_equate(schedule_deps
, isl_dim_out
, i
,
145 test
= isl_map_universe(isl_map_get_space(schedule_deps
));
146 test
= isl_map_equate(test
, isl_dim_out
, dimension
, isl_dim_in
,
148 is_parallel
= isl_map_is_subset(schedule_deps
, test
);
150 isl_space_free(schedule_space
);
152 isl_map_free(schedule_deps
);
157 /* Mark a for node openmp parallel, if it is the outermost parallel for node.
159 static void mark_openmp_parallel(__isl_keep isl_ast_build
*build
,
160 struct ast_build_userinfo
*build_info
,
161 struct ast_node_userinfo
*node_info
)
163 if (build_info
->in_parallel_for
)
166 if (ast_schedule_dim_is_parallel(build
, build_info
->scop
)) {
167 build_info
->in_parallel_for
= 1;
168 node_info
->is_openmp
= 1;
172 /* Allocate an ast_node_info structure and initialize it with default values.
174 static struct ast_node_userinfo
*allocate_ast_node_userinfo()
176 struct ast_node_userinfo
*node_info
;
177 node_info
= (struct ast_node_userinfo
*)
178 malloc(sizeof(struct ast_node_userinfo
));
179 node_info
->is_openmp
= 0;
183 /* Free an ast_node_info structure.
185 static void free_ast_node_userinfo(void *ptr
)
187 struct ast_node_userinfo
*info
;
188 info
= (struct ast_node_userinfo
*) ptr
;
192 /* This method is executed before the construction of a for node. It creates
193 * an isl_id that is used to annotate the subsequently generated ast for nodes.
195 * In this function we also run the following analyses:
197 * - Detection of openmp parallel loops
199 static __isl_give isl_id
*ast_build_before_for(
200 __isl_keep isl_ast_build
*build
, void *user
)
203 struct ast_build_userinfo
*build_info
;
204 struct ast_node_userinfo
*node_info
;
206 build_info
= (struct ast_build_userinfo
*) user
;
207 node_info
= allocate_ast_node_userinfo();
208 id
= isl_id_alloc(isl_ast_build_get_ctx(build
), "", node_info
);
209 id
= isl_id_set_free_user(id
, free_ast_node_userinfo
);
211 mark_openmp_parallel(build
, build_info
, node_info
);
216 /* This method is executed after the construction of a for node.
218 * It performs the following actions:
220 * - Reset the 'in_parallel_for' flag, as soon as we leave a for node,
221 * that is marked as openmp parallel.
224 static __isl_give isl_ast_node
*ast_build_after_for(__isl_take isl_ast_node
*node
,
225 __isl_keep isl_ast_build
*build
, void *user
) {
227 struct ast_build_userinfo
*build_info
;
228 struct ast_node_userinfo
*info
;
230 id
= isl_ast_node_get_annotation(node
);
231 info
= isl_id_get_user(id
);
233 if (info
&& info
->is_openmp
) {
234 build_info
= (struct ast_build_userinfo
*) user
;
235 build_info
->in_parallel_for
= 0;
243 /* Find the element in scop->stmts that has the given "id".
245 static struct pet_stmt
*find_stmt(struct ppcg_scop
*scop
, __isl_keep isl_id
*id
)
249 for (i
= 0; i
< scop
->pet
->n_stmt
; ++i
) {
250 struct pet_stmt
*stmt
= scop
->pet
->stmts
[i
];
253 id_i
= isl_set_get_tuple_id(stmt
->domain
);
260 isl_die(isl_id_get_ctx(id
), isl_error_internal
,
261 "statement not found", return NULL
);
264 /* Print a user statement in the generated AST.
265 * The ppcg_stmt has been attached to the node in at_each_domain.
267 static __isl_give isl_printer
*print_user(__isl_take isl_printer
*p
,
268 __isl_take isl_ast_print_options
*print_options
,
269 __isl_keep isl_ast_node
*node
, void *user
)
271 struct ppcg_stmt
*stmt
;
274 id
= isl_ast_node_get_annotation(node
);
275 stmt
= isl_id_get_user(id
);
278 p
= pet_stmt_print_body(stmt
->stmt
, p
, stmt
->ref2expr
);
280 isl_ast_print_options_free(print_options
);
286 /* Print a for loop node as an openmp parallel loop.
288 * To print an openmp parallel loop we print a normal for loop, but add
289 * "#pragma openmp parallel for" in front.
291 * Variables that are declared within the body of this for loop are
292 * automatically openmp 'private'. Iterators declared outside of the
293 * for loop are automatically openmp 'shared'. As ppcg declares all iterators
294 * at the position where they are assigned, there is no need to explicitly mark
295 * variables. Their automatically assigned type is already correct.
297 * This function only generates valid OpenMP code, if the ast was generated
298 * with the 'atomic-bounds' option enabled.
301 static __isl_give isl_printer
*print_for_with_openmp(
302 __isl_keep isl_ast_node
*node
, __isl_take isl_printer
*p
,
303 __isl_take isl_ast_print_options
*print_options
)
305 p
= isl_printer_start_line(p
);
306 p
= isl_printer_print_str(p
, "#pragma omp parallel for");
307 p
= isl_printer_end_line(p
);
309 p
= isl_ast_node_for_print(node
, p
, print_options
);
316 * Depending on how the node is annotated, we either print a normal
317 * for node or an openmp parallel for node.
319 static __isl_give isl_printer
*print_for(__isl_take isl_printer
*p
,
320 __isl_take isl_ast_print_options
*print_options
,
321 __isl_keep isl_ast_node
*node
, void *user
)
323 struct ppcg_print_info
*print_info
;
328 id
= isl_ast_node_get_annotation(node
);
331 struct ast_node_userinfo
*info
;
333 info
= (struct ast_node_userinfo
*) isl_id_get_user(id
);
334 if (info
&& info
->is_openmp
)
339 p
= print_for_with_openmp(node
, p
, print_options
);
341 p
= isl_ast_node_for_print(node
, p
, print_options
);
348 /* Index transformation callback for pet_stmt_build_ast_exprs.
350 * "index" expresses the array indices in terms of statement iterators
351 * "iterator_map" expresses the statement iterators in terms of
352 * AST loop iterators.
354 * The result expresses the array indices in terms of
355 * AST loop iterators.
357 static __isl_give isl_multi_pw_aff
*pullback_index(
358 __isl_take isl_multi_pw_aff
*index
, __isl_keep isl_id
*id
, void *user
)
360 isl_pw_multi_aff
*iterator_map
= user
;
362 iterator_map
= isl_pw_multi_aff_copy(iterator_map
);
363 return isl_multi_pw_aff_pullback_pw_multi_aff(index
, iterator_map
);
366 /* Transform the accesses in the statement associated to the domain
367 * called by "node" to refer to the AST loop iterators, construct
368 * corresponding AST expressions using "build",
369 * collect them in a ppcg_stmt and annotate the node with the ppcg_stmt.
371 static __isl_give isl_ast_node
*at_each_domain(__isl_take isl_ast_node
*node
,
372 __isl_keep isl_ast_build
*build
, void *user
)
374 struct ppcg_scop
*scop
= user
;
375 isl_ast_expr
*expr
, *arg
;
379 isl_pw_multi_aff
*iterator_map
;
380 struct ppcg_stmt
*stmt
;
382 ctx
= isl_ast_node_get_ctx(node
);
383 stmt
= isl_calloc_type(ctx
, struct ppcg_stmt
);
387 expr
= isl_ast_node_user_get_expr(node
);
388 arg
= isl_ast_expr_get_op_arg(expr
, 0);
389 isl_ast_expr_free(expr
);
390 id
= isl_ast_expr_get_id(arg
);
391 isl_ast_expr_free(arg
);
392 stmt
->stmt
= find_stmt(scop
, id
);
397 map
= isl_map_from_union_map(isl_ast_build_get_schedule(build
));
398 map
= isl_map_reverse(map
);
399 iterator_map
= isl_pw_multi_aff_from_map(map
);
400 stmt
->ref2expr
= pet_stmt_build_ast_exprs(stmt
->stmt
, build
,
401 &pullback_index
, iterator_map
, NULL
, NULL
);
402 isl_pw_multi_aff_free(iterator_map
);
404 id
= isl_id_alloc(isl_ast_node_get_ctx(node
), NULL
, stmt
);
405 id
= isl_id_set_free_user(id
, &ppcg_stmt_free
);
406 return isl_ast_node_set_annotation(node
, id
);
408 ppcg_stmt_free(stmt
);
409 return isl_ast_node_free(node
);
412 /* Set *depth to the number of scheduling dimensions
413 * for the schedule of the first domain.
414 * We assume here that this number is the same for all domains.
416 static int set_depth(__isl_take isl_map
*map
, void *user
)
418 unsigned *depth
= user
;
420 *depth
= isl_map_dim(map
, isl_dim_out
);
426 /* Code generate the scop 'scop' and print the corresponding C code to 'p'.
428 static __isl_give isl_printer
*print_scop(struct ppcg_scop
*scop
,
429 __isl_take isl_printer
*p
, struct ppcg_options
*options
)
431 isl_ctx
*ctx
= isl_printer_get_ctx(p
);
433 isl_union_set
*domain_set
;
434 isl_union_map
*schedule_map
;
435 isl_ast_build
*build
;
436 isl_ast_print_options
*print_options
;
438 isl_id_list
*iterators
;
439 struct ast_build_userinfo build_info
;
442 context
= isl_set_copy(scop
->context
);
443 domain_set
= isl_union_set_copy(scop
->domain
);
444 schedule_map
= isl_schedule_get_map(scop
->schedule
);
445 schedule_map
= isl_union_map_intersect_domain(schedule_map
, domain_set
);
447 isl_union_map_foreach_map(schedule_map
, &set_depth
, &depth
);
449 build
= isl_ast_build_from_context(context
);
450 iterators
= ppcg_scop_generate_names(scop
, depth
, "c");
451 build
= isl_ast_build_set_iterators(build
, iterators
);
452 build
= isl_ast_build_set_at_each_domain(build
, &at_each_domain
, scop
);
454 if (options
->openmp
) {
455 build_info
.scop
= scop
;
456 build_info
.in_parallel_for
= 0;
458 build
= isl_ast_build_set_before_each_for(build
,
459 &ast_build_before_for
,
461 build
= isl_ast_build_set_after_each_for(build
,
462 &ast_build_after_for
,
466 tree
= isl_ast_build_node_from_schedule_map(build
, schedule_map
);
467 isl_ast_build_free(build
);
469 print_options
= isl_ast_print_options_alloc(ctx
);
470 print_options
= isl_ast_print_options_set_print_user(print_options
,
473 print_options
= isl_ast_print_options_set_print_for(print_options
,
476 p
= isl_ast_node_print_macros(tree
, p
);
477 p
= isl_ast_node_print(tree
, p
, print_options
);
479 isl_ast_node_free(tree
);
484 /* Does "scop" refer to any arrays that are declared, but not
485 * exposed to the code after the scop?
487 static int any_hidden_declarations(struct ppcg_scop
*scop
)
494 for (i
= 0; i
< scop
->pet
->n_array
; ++i
)
495 if (scop
->pet
->arrays
[i
]->declared
&&
496 !scop
->pet
->arrays
[i
]->exposed
)
502 /* Generate CPU code for the scop "ps" and print the corresponding C code
503 * to "p", including variable declarations.
505 __isl_give isl_printer
*print_cpu(__isl_take isl_printer
*p
,
506 struct ppcg_scop
*ps
, struct ppcg_options
*options
)
510 p
= isl_printer_start_line(p
);
511 p
= isl_printer_print_str(p
, "/* ppcg generated CPU code */");
512 p
= isl_printer_end_line(p
);
514 p
= isl_printer_start_line(p
);
515 p
= isl_printer_end_line(p
);
517 p
= ppcg_print_exposed_declarations(p
, ps
);
518 hidden
= any_hidden_declarations(ps
);
520 p
= ppcg_start_block(p
);
521 p
= ppcg_print_hidden_declarations(p
, ps
);
523 if (options
->debug
->dump_final_schedule
)
524 isl_schedule_dump(ps
->schedule
);
525 p
= print_scop(ps
, p
, options
);
527 p
= ppcg_end_block(p
);
532 /* Wrapper around print_cpu for use as a ppcg_transform callback.
534 static __isl_give isl_printer
*print_cpu_wrap(__isl_take isl_printer
*p
,
535 struct ppcg_scop
*scop
, void *user
)
537 struct ppcg_options
*options
= user
;
539 return print_cpu(p
, scop
, options
);
542 /* Transform the code in the file called "input" by replacing
543 * all scops by corresponding CPU code and write the results to a file
546 int generate_cpu(isl_ctx
*ctx
, struct ppcg_options
*options
,
547 const char *input
, const char *output
)
552 output_file
= get_output_file(input
, output
);
554 r
= ppcg_transform(ctx
, input
, output_file
, options
,
555 &print_cpu_wrap
, options
);