2 * Copyright 2012 INRIA Paris-Rocquencourt
4 * Use of this software is governed by the MIT license
6 * Written by Tobias Grosser, INRIA Paris-Rocquencourt,
7 * Domaine de Voluceau, Rocquenqourt, B.P. 105,
8 * 78153 Le Chesnay Cedex France
18 #include <isl/ast_build.h>
22 #include "ppcg_options.h"
24 #include "pet_printer.h"
28 /* Representation of a statement inside a generated AST.
30 * "stmt" refers to the original statement.
31 * "n_access" is the number of accesses in the statement.
32 * "access" is the list of accesses transformed to refer to the iterators
33 * in the generated AST.
36 struct pet_stmt
*stmt
;
39 isl_ast_expr_list
**access
;
42 static void ppcg_stmt_free(void *user
)
44 struct ppcg_stmt
*stmt
= user
;
50 for (i
= 0; i
< stmt
->n_access
; ++i
)
51 isl_ast_expr_list_free(stmt
->access
[i
]);
57 /* Derive the output file name from the input file name.
58 * 'input' is the entire path of the input file. The output
59 * is the file name plus the additional extension.
61 * We will basically replace everything after the last point
62 * with '.ppcg.c'. This means file.c becomes file.ppcg.c
64 static FILE *get_output_file(const char *input
, const char *output
)
69 const char ppcg_marker
[] = ".ppcg";
72 base
= strrchr(input
, '/');
77 ext
= strrchr(base
, '.');
78 len
= ext
? ext
- base
: strlen(base
);
80 memcpy(name
, base
, len
);
81 strcpy(name
+ len
, ppcg_marker
);
82 strcpy(name
+ len
+ sizeof(ppcg_marker
) - 1, ext
? ext
: ".c");
87 return fopen(output
, "w");
90 /* Data used to annotate for nodes in the ast.
92 struct ast_node_userinfo
{
93 /* The for node is an openmp parallel for node. */
97 /* Information used while building the ast.
99 struct ast_build_userinfo
{
100 /* The current ppcg scop. */
101 struct ppcg_scop
*scop
;
103 /* Are we currently in a parallel for loop? */
107 /* Check if the current scheduling dimension is parallel.
109 * We check for parallelism by verifying that the loop does not carry any
112 * Parallelism test: if the distance is zero in all outer dimensions, then it
113 * has to be zero in the current dimension as well.
114 * Implementation: first, translate dependences into time space, then force
115 * outer dimensions to be equal. If the distance is zero in the current
116 * dimension, then the loop is parallel.
117 * The distance is zero in the current dimension if it is a subset of a map
118 * with equal values for the current dimension.
120 static int ast_schedule_dim_is_parallel(__isl_keep isl_ast_build
*build
,
121 struct ppcg_scop
*scop
)
123 isl_union_map
*schedule_node
, *schedule
, *deps
;
124 isl_map
*schedule_deps
, *test
;
125 isl_space
*schedule_space
;
126 unsigned i
, dimension
, is_parallel
;
128 schedule
= isl_ast_build_get_schedule(build
);
129 schedule_space
= isl_ast_build_get_schedule_space(build
);
131 dimension
= isl_space_dim(schedule_space
, isl_dim_out
) - 1;
133 deps
= isl_union_map_copy(scop
->dep_flow
);
134 deps
= isl_union_map_union(deps
, isl_union_map_copy(scop
->dep_false
));
135 deps
= isl_union_map_apply_range(deps
, isl_union_map_copy(schedule
));
136 deps
= isl_union_map_apply_domain(deps
, schedule
);
138 if (isl_union_map_is_empty(deps
)) {
139 isl_union_map_free(deps
);
140 isl_space_free(schedule_space
);
144 schedule_deps
= isl_map_from_union_map(deps
);
146 for (i
= 0; i
< dimension
; i
++)
147 schedule_deps
= isl_map_equate(schedule_deps
, isl_dim_out
, i
,
150 test
= isl_map_universe(isl_map_get_space(schedule_deps
));
151 test
= isl_map_equate(test
, isl_dim_out
, dimension
, isl_dim_in
,
153 is_parallel
= isl_map_is_subset(schedule_deps
, test
);
155 isl_space_free(schedule_space
);
157 isl_map_free(schedule_deps
);
162 /* Mark a for node openmp parallel, if it is the outermost parallel for node.
164 static void mark_openmp_parallel(__isl_keep isl_ast_build
*build
,
165 struct ast_build_userinfo
*build_info
,
166 struct ast_node_userinfo
*node_info
)
168 if (build_info
->in_parallel_for
)
171 if (ast_schedule_dim_is_parallel(build
, build_info
->scop
)) {
172 build_info
->in_parallel_for
= 1;
173 node_info
->is_openmp
= 1;
177 /* Allocate an ast_node_info structure and initialize it with default values.
179 static struct ast_node_userinfo
*allocate_ast_node_userinfo()
181 struct ast_node_userinfo
*node_info
;
182 node_info
= (struct ast_node_userinfo
*)
183 malloc(sizeof(struct ast_node_userinfo
));
184 node_info
->is_openmp
= 0;
188 /* Free an ast_node_info structure.
190 static void free_ast_node_userinfo(void *ptr
)
192 struct ast_node_userinfo
*info
;
193 info
= (struct ast_node_userinfo
*) ptr
;
197 /* This method is executed before the construction of a for node. It creates
198 * an isl_id that is used to annotate the subsequently generated ast for nodes.
200 * In this function we also run the following analyses:
202 * - Detection of openmp parallel loops
204 static __isl_give isl_id
*ast_build_before_for(
205 __isl_keep isl_ast_build
*build
, void *user
)
208 struct ast_build_userinfo
*build_info
;
209 struct ast_node_userinfo
*node_info
;
211 build_info
= (struct ast_build_userinfo
*) user
;
212 node_info
= allocate_ast_node_userinfo();
213 id
= isl_id_alloc(isl_ast_build_get_ctx(build
), "", node_info
);
214 id
= isl_id_set_free_user(id
, free_ast_node_userinfo
);
216 mark_openmp_parallel(build
, build_info
, node_info
);
221 /* This method is executed after the construction of a for node.
223 * It performs the following actions:
225 * - Reset the 'in_parallel_for' flag, as soon as we leave a for node,
226 * that is marked as openmp parallel.
229 static __isl_give isl_ast_node
*ast_build_after_for(__isl_take isl_ast_node
*node
,
230 __isl_keep isl_ast_build
*build
, void *user
) {
232 struct ast_build_userinfo
*build_info
;
233 struct ast_node_userinfo
*info
;
235 id
= isl_ast_node_get_annotation(node
);
236 info
= isl_id_get_user(id
);
238 if (info
&& info
->is_openmp
) {
239 build_info
= (struct ast_build_userinfo
*) user
;
240 build_info
->in_parallel_for
= 0;
248 /* Print a memory access 'access' to the printer 'p'.
250 * "expr" refers to the original access.
251 * "access" is the list of index expressions transformed to refer
252 * to the iterators of the generated AST.
254 * In case the original access is unnamed (and presumably single-dimensional),
255 * we assume this is not a memory access, but just an expression.
257 static __isl_give isl_printer
*print_access(__isl_take isl_printer
*p
,
258 struct pet_expr
*expr
, __isl_keep isl_ast_expr_list
*access
)
264 n_index
= isl_ast_expr_list_n_ast_expr(access
);
265 name
= isl_map_get_tuple_name(expr
->acc
.access
, isl_dim_out
);
269 index
= isl_ast_expr_list_get_ast_expr(access
, 0);
270 p
= isl_printer_print_str(p
, "(");
271 p
= isl_printer_print_ast_expr(p
, index
);
272 p
= isl_printer_print_str(p
, ")");
273 isl_ast_expr_free(index
);
277 p
= isl_printer_print_str(p
, name
);
279 for (i
= 0; i
< n_index
; ++i
) {
282 index
= isl_ast_expr_list_get_ast_expr(access
, i
);
284 p
= isl_printer_print_str(p
, "[");
285 p
= isl_printer_print_ast_expr(p
, index
);
286 p
= isl_printer_print_str(p
, "]");
287 isl_ast_expr_free(index
);
293 /* Find the element in scop->stmts that has the given "id".
295 static struct pet_stmt
*find_stmt(struct ppcg_scop
*scop
, __isl_keep isl_id
*id
)
299 for (i
= 0; i
< scop
->n_stmt
; ++i
) {
300 struct pet_stmt
*stmt
= scop
->stmts
[i
];
303 id_i
= isl_set_get_tuple_id(stmt
->domain
);
310 isl_die(isl_id_get_ctx(id
), isl_error_internal
,
311 "statement not found", return NULL
);
314 /* To print the transformed accesses we walk the list of transformed accesses
315 * simultaneously with the pet printer. This means that whenever
316 * the pet printer prints a pet access expression we have
317 * the corresponding transformed access available for printing.
319 static __isl_give isl_printer
*print_access_expr(__isl_take isl_printer
*p
,
320 struct pet_expr
*expr
, void *user
)
322 isl_ast_expr_list
***access
= user
;
324 p
= print_access(p
, expr
, **access
);
330 /* Print a user statement in the generated AST.
331 * The ppcg_stmt has been attached to the node in at_each_domain.
333 static __isl_give isl_printer
*print_user(__isl_take isl_printer
*p
,
334 __isl_take isl_ast_print_options
*print_options
,
335 __isl_keep isl_ast_node
*node
, void *user
)
337 struct ppcg_stmt
*stmt
;
338 isl_ast_expr_list
**access
;
341 id
= isl_ast_node_get_annotation(node
);
342 stmt
= isl_id_get_user(id
);
345 access
= stmt
->access
;
347 p
= isl_printer_start_line(p
);
348 p
= print_pet_expr(p
, stmt
->stmt
->body
, &print_access_expr
, &access
);
349 p
= isl_printer_print_str(p
, ";");
350 p
= isl_printer_end_line(p
);
352 isl_ast_print_options_free(print_options
);
358 /* Print a for loop node as an openmp parallel loop.
360 * To print an openmp parallel loop we print a normal for loop, but add
361 * "#pragma openmp parallel for" in front.
363 * Variables that are declared within the body of this for loop are
364 * automatically openmp 'private'. Iterators declared outside of the
365 * for loop are automatically openmp 'shared'. As ppcg declares all iterators
366 * at the position where they are assigned, there is no need to explicitly mark
367 * variables. Their automatically assigned type is already correct.
369 * This function only generates valid OpenMP code, if the ast was generated
370 * with the 'atomic-bounds' option enabled.
373 static __isl_give isl_printer
*print_for_with_openmp(
374 __isl_keep isl_ast_node
*node
, __isl_take isl_printer
*p
,
375 __isl_take isl_ast_print_options
*print_options
)
377 p
= isl_printer_start_line(p
);
378 p
= isl_printer_print_str(p
, "#pragma omp parallel for");
379 p
= isl_printer_end_line(p
);
381 p
= isl_ast_node_for_print(node
, p
, print_options
);
388 * Depending on how the node is annotated, we either print a normal
389 * for node or an openmp parallel for node.
391 static __isl_give isl_printer
*print_for(__isl_take isl_printer
*p
,
392 __isl_take isl_ast_print_options
*print_options
,
393 __isl_keep isl_ast_node
*node
, void *user
)
395 struct ppcg_print_info
*print_info
;
400 id
= isl_ast_node_get_annotation(node
);
403 struct ast_node_userinfo
*info
;
405 info
= (struct ast_node_userinfo
*) isl_id_get_user(id
);
406 if (info
&& info
->is_openmp
)
411 p
= print_for_with_openmp(node
, p
, print_options
);
413 p
= isl_ast_node_for_print(node
, p
, print_options
);
420 /* Call "fn" on each access expression in "expr".
422 static int foreach_access_expr(struct pet_expr
*expr
,
423 int (*fn
)(struct pet_expr
*expr
, void *user
), void *user
)
430 if (expr
->type
== pet_expr_access
)
431 return fn(expr
, user
);
433 for (i
= 0; i
< expr
->n_arg
; ++i
)
434 if (foreach_access_expr(expr
->args
[i
], fn
, user
) < 0)
440 static int inc_n_access(struct pet_expr
*expr
, void *user
)
442 struct ppcg_stmt
*stmt
= user
;
447 /* Internal data for add_access.
449 * "stmt" is the statement to which an access needs to be added.
450 * "build" is the current AST build.
451 * "map" maps the AST loop iterators to the iteration domain of the statement.
453 struct ppcg_add_access_data
{
454 struct ppcg_stmt
*stmt
;
455 isl_ast_build
*build
;
459 /* Given an access expression, add it to data->stmt after
460 * transforming it to refer to the AST loop iterators.
462 static int add_access(struct pet_expr
*expr
, void *user
)
467 isl_pw_multi_aff
*pma
;
468 struct ppcg_add_access_data
*data
= user
;
469 isl_ast_expr_list
*index
;
471 ctx
= isl_map_get_ctx(expr
->acc
.access
);
472 n
= isl_map_dim(expr
->acc
.access
, isl_dim_out
);
473 access
= isl_map_copy(expr
->acc
.access
);
474 access
= isl_map_apply_range(isl_map_copy(data
->map
), access
);
475 pma
= isl_pw_multi_aff_from_map(access
);
476 pma
= isl_pw_multi_aff_coalesce(pma
);
478 index
= isl_ast_expr_list_alloc(ctx
, n
);
479 for (i
= 0; i
< n
; ++i
) {
483 pa
= isl_pw_multi_aff_get_pw_aff(pma
, i
);
484 expr
= isl_ast_build_expr_from_pw_aff(data
->build
, pa
);
485 index
= isl_ast_expr_list_add(index
, expr
);
487 isl_pw_multi_aff_free(pma
);
489 data
->stmt
->access
[data
->stmt
->n_access
] = index
;
490 data
->stmt
->n_access
++;
494 /* Transform the accesses in the statement associated to the domain
495 * called by "node" to refer to the AST loop iterators,
496 * collect them in a ppcg_stmt and annotate the node with the ppcg_stmt.
498 static __isl_give isl_ast_node
*at_each_domain(__isl_take isl_ast_node
*node
,
499 __isl_keep isl_ast_build
*build
, void *user
)
501 struct ppcg_scop
*scop
= user
;
502 isl_ast_expr
*expr
, *arg
;
506 struct ppcg_stmt
*stmt
;
507 struct ppcg_add_access_data data
;
509 ctx
= isl_ast_node_get_ctx(node
);
510 stmt
= isl_calloc_type(ctx
, struct ppcg_stmt
);
514 expr
= isl_ast_node_user_get_expr(node
);
515 arg
= isl_ast_expr_get_op_arg(expr
, 0);
516 isl_ast_expr_free(expr
);
517 id
= isl_ast_expr_get_id(arg
);
518 isl_ast_expr_free(arg
);
519 stmt
->stmt
= find_stmt(scop
, id
);
525 if (foreach_access_expr(stmt
->stmt
->body
, &inc_n_access
, stmt
) < 0)
528 stmt
->access
= isl_calloc_array(ctx
, isl_ast_expr_list
*,
533 map
= isl_map_from_union_map(isl_ast_build_get_schedule(build
));
534 map
= isl_map_reverse(map
);
540 if (foreach_access_expr(stmt
->stmt
->body
, &add_access
, &data
) < 0)
541 node
= isl_ast_node_free(node
);
545 id
= isl_id_alloc(isl_ast_node_get_ctx(node
), NULL
, stmt
);
546 id
= isl_id_set_free_user(id
, &ppcg_stmt_free
);
547 return isl_ast_node_set_annotation(node
, id
);
549 ppcg_stmt_free(stmt
);
550 return isl_ast_node_free(node
);
553 /* Code generate the scop 'scop' and print the corresponding C code to 'p'.
555 static __isl_give isl_printer
*print_scop(struct ppcg_scop
*scop
,
556 __isl_take isl_printer
*p
, struct ppcg_options
*options
)
558 isl_ctx
*ctx
= isl_printer_get_ctx(p
);
560 isl_union_set
*domain_set
;
561 isl_union_map
*schedule_map
;
562 isl_ast_build
*build
;
563 isl_ast_print_options
*print_options
;
565 struct ast_build_userinfo build_info
;
567 context
= isl_set_copy(scop
->context
);
568 domain_set
= isl_union_set_copy(scop
->domain
);
569 schedule_map
= isl_union_map_copy(scop
->schedule
);
570 schedule_map
= isl_union_map_intersect_domain(schedule_map
, domain_set
);
572 build
= isl_ast_build_from_context(context
);
573 build
= isl_ast_build_set_at_each_domain(build
, &at_each_domain
, scop
);
575 if (options
->openmp
) {
576 build_info
.scop
= scop
;
577 build_info
.in_parallel_for
= 0;
579 build
= isl_ast_build_set_before_each_for(build
,
580 &ast_build_before_for
,
582 build
= isl_ast_build_set_after_each_for(build
,
583 &ast_build_after_for
,
587 tree
= isl_ast_build_ast_from_schedule(build
, schedule_map
);
588 isl_ast_build_free(build
);
590 print_options
= isl_ast_print_options_alloc(ctx
);
591 print_options
= isl_ast_print_options_set_print_user(print_options
,
594 print_options
= isl_ast_print_options_set_print_for(print_options
,
597 p
= isl_ast_node_print_macros(tree
, p
);
598 p
= isl_ast_node_print(tree
, p
, print_options
);
600 isl_ast_node_free(tree
);
605 /* Does "scop" refer to any arrays that are declared, but not
606 * exposed to the code after the scop?
608 static int any_hidden_declarations(struct ppcg_scop
*scop
)
615 for (i
= 0; i
< scop
->n_array
; ++i
)
616 if (scop
->arrays
[i
]->declared
&& !scop
->arrays
[i
]->exposed
)
622 /* Generate CPU code for the scop "ps" and print the corresponding C code
623 * to "p", including variable declarations.
625 __isl_give isl_printer
*print_cpu(__isl_take isl_printer
*p
,
626 struct ppcg_scop
*ps
, struct ppcg_options
*options
)
630 p
= isl_printer_start_line(p
);
631 p
= isl_printer_print_str(p
, "/* ppcg generated CPU code */");
632 p
= isl_printer_end_line(p
);
634 p
= isl_printer_start_line(p
);
635 p
= isl_printer_end_line(p
);
637 p
= ppcg_print_exposed_declarations(p
, ps
);
638 hidden
= any_hidden_declarations(ps
);
640 p
= ppcg_start_block(p
);
641 p
= ppcg_print_hidden_declarations(p
, ps
);
643 p
= print_scop(ps
, p
, options
);
645 p
= ppcg_end_block(p
);
650 int generate_cpu(isl_ctx
*ctx
, struct ppcg_scop
*ps
,
651 struct ppcg_options
*options
, const char *input
, const char *output
)
660 input_file
= fopen(input
, "r");
661 output_file
= get_output_file(input
, output
);
663 copy(input_file
, output_file
, 0, ps
->start
);
664 p
= isl_printer_to_file(ctx
, output_file
);
665 p
= isl_printer_set_output_format(p
, ISL_FORMAT_C
);
666 p
= print_cpu(p
, ps
, options
);
668 copy(input_file
, output_file
, ps
->end
, -1);