ppcg_scop_from_pet_scop: abort on data dependent conditions
[ppcg.git] / cpu.c
blobd6f3b863ebe51d05d465fe43e31e4feb65b62491
1 /*
2 * Copyright 2012 INRIA Paris-Rocquencourt
4 * Use of this software is governed by the MIT license
6 * Written by Tobias Grosser, INRIA Paris-Rocquencourt,
7 * Domaine de Voluceau, Rocquenqourt, B.P. 105,
8 * 78153 Le Chesnay Cedex France
9 */
11 #include <limits.h>
12 #include <stdio.h>
13 #include <string.h>
15 #include <isl/aff.h>
16 #include <isl/ctx.h>
17 #include <isl/map.h>
18 #include <isl/ast_build.h>
19 #include <pet.h>
21 #include "ppcg.h"
22 #include "ppcg_options.h"
23 #include "cpu.h"
24 #include "pet_printer.h"
25 #include "print.h"
26 #include "rewrite.h"
28 /* Representation of a statement inside a generated AST.
30 * "stmt" refers to the original statement.
31 * "n_access" is the number of accesses in the statement.
32 * "access" is the list of accesses transformed to refer to the iterators
33 * in the generated AST.
35 struct ppcg_stmt {
36 struct pet_stmt *stmt;
38 int n_access;
39 isl_ast_expr_list **access;
42 static void ppcg_stmt_free(void *user)
44 struct ppcg_stmt *stmt = user;
45 int i;
47 if (!stmt)
48 return;
50 for (i = 0; i < stmt->n_access; ++i)
51 isl_ast_expr_list_free(stmt->access[i]);
53 free(stmt->access);
54 free(stmt);
57 /* Derive the output file name from the input file name.
58 * 'input' is the entire path of the input file. The output
59 * is the file name plus the additional extension.
61 * We will basically replace everything after the last point
62 * with '.ppcg.c'. This means file.c becomes file.ppcg.c
64 static FILE *get_output_file(const char *input, const char *output)
66 char name[PATH_MAX];
67 const char *base;
68 const char *ext;
69 const char ppcg_marker[] = ".ppcg";
70 int len;
72 base = strrchr(input, '/');
73 if (base)
74 base++;
75 else
76 base = input;
77 ext = strrchr(base, '.');
78 len = ext ? ext - base : strlen(base);
80 memcpy(name, base, len);
81 strcpy(name + len, ppcg_marker);
82 strcpy(name + len + sizeof(ppcg_marker) - 1, ext ? ext : ".c");
84 if (!output)
85 output = name;
87 return fopen(output, "w");
90 /* Data used to annotate for nodes in the ast.
92 struct ast_node_userinfo {
93 /* The for node is an openmp parallel for node. */
94 int is_openmp;
97 /* Information used while building the ast.
99 struct ast_build_userinfo {
100 /* The current ppcg scop. */
101 struct ppcg_scop *scop;
103 /* Are we currently in a parallel for loop? */
104 int in_parallel_for;
107 /* Check if the current scheduling dimension is parallel.
109 * We check for parallelism by verifying that the loop does not carry any
110 * dependences.
112 * Parallelism test: if the distance is zero in all outer dimensions, then it
113 * has to be zero in the current dimension as well.
114 * Implementation: first, translate dependences into time space, then force
115 * outer dimensions to be equal. If the distance is zero in the current
116 * dimension, then the loop is parallel.
117 * The distance is zero in the current dimension if it is a subset of a map
118 * with equal values for the current dimension.
120 static int ast_schedule_dim_is_parallel(__isl_keep isl_ast_build *build,
121 struct ppcg_scop *scop)
123 isl_union_map *schedule_node, *schedule, *deps;
124 isl_map *schedule_deps, *test;
125 isl_space *schedule_space;
126 unsigned i, dimension, is_parallel;
128 schedule = isl_ast_build_get_schedule(build);
129 schedule_space = isl_ast_build_get_schedule_space(build);
131 dimension = isl_space_dim(schedule_space, isl_dim_out) - 1;
133 deps = isl_union_map_copy(scop->dep_flow);
134 deps = isl_union_map_union(deps, isl_union_map_copy(scop->dep_false));
135 deps = isl_union_map_apply_range(deps, isl_union_map_copy(schedule));
136 deps = isl_union_map_apply_domain(deps, schedule);
138 if (isl_union_map_is_empty(deps)) {
139 isl_union_map_free(deps);
140 isl_space_free(schedule_space);
141 return 1;
144 schedule_deps = isl_map_from_union_map(deps);
146 for (i = 0; i < dimension; i++)
147 schedule_deps = isl_map_equate(schedule_deps, isl_dim_out, i,
148 isl_dim_in, i);
150 test = isl_map_universe(isl_map_get_space(schedule_deps));
151 test = isl_map_equate(test, isl_dim_out, dimension, isl_dim_in,
152 dimension);
153 is_parallel = isl_map_is_subset(schedule_deps, test);
155 isl_space_free(schedule_space);
156 isl_map_free(test);
157 isl_map_free(schedule_deps);
159 return is_parallel;
162 /* Mark a for node openmp parallel, if it is the outermost parallel for node.
164 static void mark_openmp_parallel(__isl_keep isl_ast_build *build,
165 struct ast_build_userinfo *build_info,
166 struct ast_node_userinfo *node_info)
168 if (build_info->in_parallel_for)
169 return;
171 if (ast_schedule_dim_is_parallel(build, build_info->scop)) {
172 build_info->in_parallel_for = 1;
173 node_info->is_openmp = 1;
177 /* Allocate an ast_node_info structure and initialize it with default values.
179 static struct ast_node_userinfo *allocate_ast_node_userinfo()
181 struct ast_node_userinfo *node_info;
182 node_info = (struct ast_node_userinfo *)
183 malloc(sizeof(struct ast_node_userinfo));
184 node_info->is_openmp = 0;
185 return node_info;
188 /* Free an ast_node_info structure.
190 static void free_ast_node_userinfo(void *ptr)
192 struct ast_node_userinfo *info;
193 info = (struct ast_node_userinfo *) ptr;
194 free(info);
197 /* This method is executed before the construction of a for node. It creates
198 * an isl_id that is used to annotate the subsequently generated ast for nodes.
200 * In this function we also run the following analyses:
202 * - Detection of openmp parallel loops
204 static __isl_give isl_id *ast_build_before_for(
205 __isl_keep isl_ast_build *build, void *user)
207 isl_id *id;
208 struct ast_build_userinfo *build_info;
209 struct ast_node_userinfo *node_info;
211 build_info = (struct ast_build_userinfo *) user;
212 node_info = allocate_ast_node_userinfo();
213 id = isl_id_alloc(isl_ast_build_get_ctx(build), "", node_info);
214 id = isl_id_set_free_user(id, free_ast_node_userinfo);
216 mark_openmp_parallel(build, build_info, node_info);
218 return id;
221 /* This method is executed after the construction of a for node.
223 * It performs the following actions:
225 * - Reset the 'in_parallel_for' flag, as soon as we leave a for node,
226 * that is marked as openmp parallel.
229 static __isl_give isl_ast_node *ast_build_after_for(__isl_take isl_ast_node *node,
230 __isl_keep isl_ast_build *build, void *user) {
231 isl_id *id;
232 struct ast_build_userinfo *build_info;
233 struct ast_node_userinfo *info;
235 id = isl_ast_node_get_annotation(node);
236 info = isl_id_get_user(id);
238 if (info && info->is_openmp) {
239 build_info = (struct ast_build_userinfo *) user;
240 build_info->in_parallel_for = 0;
243 isl_id_free(id);
245 return node;
248 /* Print a memory access 'access' to the printer 'p'.
250 * "expr" refers to the original access.
251 * "access" is the list of index expressions transformed to refer
252 * to the iterators of the generated AST.
254 * In case the original access is unnamed (and presumably single-dimensional),
255 * we assume this is not a memory access, but just an expression.
257 static __isl_give isl_printer *print_access(__isl_take isl_printer *p,
258 struct pet_expr *expr, __isl_keep isl_ast_expr_list *access)
260 int i;
261 const char *name;
262 unsigned n_index;
264 n_index = isl_ast_expr_list_n_ast_expr(access);
265 name = isl_map_get_tuple_name(expr->acc.access, isl_dim_out);
267 if (name == NULL) {
268 isl_ast_expr *index;
269 index = isl_ast_expr_list_get_ast_expr(access, 0);
270 p = isl_printer_print_str(p, "(");
271 p = isl_printer_print_ast_expr(p, index);
272 p = isl_printer_print_str(p, ")");
273 isl_ast_expr_free(index);
274 return p;
277 p = isl_printer_print_str(p, name);
279 for (i = 0; i < n_index; ++i) {
280 isl_ast_expr *index;
282 index = isl_ast_expr_list_get_ast_expr(access, i);
284 p = isl_printer_print_str(p, "[");
285 p = isl_printer_print_ast_expr(p, index);
286 p = isl_printer_print_str(p, "]");
287 isl_ast_expr_free(index);
290 return p;
293 /* Find the element in scop->stmts that has the given "id".
295 static struct pet_stmt *find_stmt(struct ppcg_scop *scop, __isl_keep isl_id *id)
297 int i;
299 for (i = 0; i < scop->n_stmt; ++i) {
300 struct pet_stmt *stmt = scop->stmts[i];
301 isl_id *id_i;
303 id_i = isl_set_get_tuple_id(stmt->domain);
304 isl_id_free(id_i);
306 if (id_i == id)
307 return stmt;
310 isl_die(isl_id_get_ctx(id), isl_error_internal,
311 "statement not found", return NULL);
314 /* To print the transformed accesses we walk the list of transformed accesses
315 * simultaneously with the pet printer. This means that whenever
316 * the pet printer prints a pet access expression we have
317 * the corresponding transformed access available for printing.
319 static __isl_give isl_printer *print_access_expr(__isl_take isl_printer *p,
320 struct pet_expr *expr, void *user)
322 isl_ast_expr_list ***access = user;
324 p = print_access(p, expr, **access);
325 (*access)++;
327 return p;
330 /* Print a user statement in the generated AST.
331 * The ppcg_stmt has been attached to the node in at_each_domain.
333 static __isl_give isl_printer *print_user(__isl_take isl_printer *p,
334 __isl_take isl_ast_print_options *print_options,
335 __isl_keep isl_ast_node *node, void *user)
337 struct ppcg_stmt *stmt;
338 isl_ast_expr_list **access;
339 isl_id *id;
341 id = isl_ast_node_get_annotation(node);
342 stmt = isl_id_get_user(id);
343 isl_id_free(id);
345 access = stmt->access;
347 p = isl_printer_start_line(p);
348 p = print_pet_expr(p, stmt->stmt->body, &print_access_expr, &access);
349 p = isl_printer_print_str(p, ";");
350 p = isl_printer_end_line(p);
352 isl_ast_print_options_free(print_options);
354 return p;
358 /* Print a for loop node as an openmp parallel loop.
360 * To print an openmp parallel loop we print a normal for loop, but add
361 * "#pragma openmp parallel for" in front.
363 * Variables that are declared within the body of this for loop are
364 * automatically openmp 'private'. Iterators declared outside of the
365 * for loop are automatically openmp 'shared'. As ppcg declares all iterators
366 * at the position where they are assigned, there is no need to explicitly mark
367 * variables. Their automatically assigned type is already correct.
369 * This function only generates valid OpenMP code, if the ast was generated
370 * with the 'atomic-bounds' option enabled.
373 static __isl_give isl_printer *print_for_with_openmp(
374 __isl_keep isl_ast_node *node, __isl_take isl_printer *p,
375 __isl_take isl_ast_print_options *print_options)
377 p = isl_printer_start_line(p);
378 p = isl_printer_print_str(p, "#pragma omp parallel for");
379 p = isl_printer_end_line(p);
381 p = isl_ast_node_for_print(node, p, print_options);
383 return p;
386 /* Print a for node.
388 * Depending on how the node is annotated, we either print a normal
389 * for node or an openmp parallel for node.
391 static __isl_give isl_printer *print_for(__isl_take isl_printer *p,
392 __isl_take isl_ast_print_options *print_options,
393 __isl_keep isl_ast_node *node, void *user)
395 struct ppcg_print_info *print_info;
396 isl_id *id;
397 int openmp;
399 openmp = 0;
400 id = isl_ast_node_get_annotation(node);
402 if (id) {
403 struct ast_node_userinfo *info;
405 info = (struct ast_node_userinfo *) isl_id_get_user(id);
406 if (info && info->is_openmp)
407 openmp = 1;
410 if (openmp)
411 p = print_for_with_openmp(node, p, print_options);
412 else
413 p = isl_ast_node_for_print(node, p, print_options);
415 isl_id_free(id);
417 return p;
420 /* Call "fn" on each access expression in "expr".
422 static int foreach_access_expr(struct pet_expr *expr,
423 int (*fn)(struct pet_expr *expr, void *user), void *user)
425 int i;
427 if (!expr)
428 return -1;
430 if (expr->type == pet_expr_access)
431 return fn(expr, user);
433 for (i = 0; i < expr->n_arg; ++i)
434 if (foreach_access_expr(expr->args[i], fn, user) < 0)
435 return -1;
437 return 0;
440 static int inc_n_access(struct pet_expr *expr, void *user)
442 struct ppcg_stmt *stmt = user;
443 stmt->n_access++;
444 return 0;
447 /* Internal data for add_access.
449 * "stmt" is the statement to which an access needs to be added.
450 * "build" is the current AST build.
451 * "map" maps the AST loop iterators to the iteration domain of the statement.
453 struct ppcg_add_access_data {
454 struct ppcg_stmt *stmt;
455 isl_ast_build *build;
456 isl_map *map;
459 /* Given an access expression, add it to data->stmt after
460 * transforming it to refer to the AST loop iterators.
462 static int add_access(struct pet_expr *expr, void *user)
464 int i, n;
465 isl_ctx *ctx;
466 isl_map *access;
467 isl_pw_multi_aff *pma;
468 struct ppcg_add_access_data *data = user;
469 isl_ast_expr_list *index;
471 ctx = isl_map_get_ctx(expr->acc.access);
472 n = isl_map_dim(expr->acc.access, isl_dim_out);
473 access = isl_map_copy(expr->acc.access);
474 access = isl_map_apply_range(isl_map_copy(data->map), access);
475 pma = isl_pw_multi_aff_from_map(access);
476 pma = isl_pw_multi_aff_coalesce(pma);
478 index = isl_ast_expr_list_alloc(ctx, n);
479 for (i = 0; i < n; ++i) {
480 isl_pw_aff *pa;
481 isl_ast_expr *expr;
483 pa = isl_pw_multi_aff_get_pw_aff(pma, i);
484 expr = isl_ast_build_expr_from_pw_aff(data->build, pa);
485 index = isl_ast_expr_list_add(index, expr);
487 isl_pw_multi_aff_free(pma);
489 data->stmt->access[data->stmt->n_access] = index;
490 data->stmt->n_access++;
491 return 0;
494 /* Transform the accesses in the statement associated to the domain
495 * called by "node" to refer to the AST loop iterators,
496 * collect them in a ppcg_stmt and annotate the node with the ppcg_stmt.
498 static __isl_give isl_ast_node *at_each_domain(__isl_take isl_ast_node *node,
499 __isl_keep isl_ast_build *build, void *user)
501 struct ppcg_scop *scop = user;
502 isl_ast_expr *expr, *arg;
503 isl_ctx *ctx;
504 isl_id *id;
505 isl_map *map;
506 struct ppcg_stmt *stmt;
507 struct ppcg_add_access_data data;
509 ctx = isl_ast_node_get_ctx(node);
510 stmt = isl_calloc_type(ctx, struct ppcg_stmt);
511 if (!stmt)
512 goto error;
514 expr = isl_ast_node_user_get_expr(node);
515 arg = isl_ast_expr_get_op_arg(expr, 0);
516 isl_ast_expr_free(expr);
517 id = isl_ast_expr_get_id(arg);
518 isl_ast_expr_free(arg);
519 stmt->stmt = find_stmt(scop, id);
520 isl_id_free(id);
521 if (!stmt->stmt)
522 goto error;
524 stmt->n_access = 0;
525 if (foreach_access_expr(stmt->stmt->body, &inc_n_access, stmt) < 0)
526 goto error;
528 stmt->access = isl_calloc_array(ctx, isl_ast_expr_list *,
529 stmt->n_access);
530 if (!stmt->access)
531 goto error;
533 map = isl_map_from_union_map(isl_ast_build_get_schedule(build));
534 map = isl_map_reverse(map);
536 stmt->n_access = 0;
537 data.stmt = stmt;
538 data.build = build;
539 data.map = map;
540 if (foreach_access_expr(stmt->stmt->body, &add_access, &data) < 0)
541 node = isl_ast_node_free(node);
543 isl_map_free(map);
545 id = isl_id_alloc(isl_ast_node_get_ctx(node), NULL, stmt);
546 id = isl_id_set_free_user(id, &ppcg_stmt_free);
547 return isl_ast_node_set_annotation(node, id);
548 error:
549 ppcg_stmt_free(stmt);
550 return isl_ast_node_free(node);
553 /* Code generate the scop 'scop' and print the corresponding C code to 'p'.
555 static __isl_give isl_printer *print_scop(isl_ctx *ctx, struct ppcg_scop *scop,
556 __isl_take isl_printer *p, struct ppcg_options *options)
558 isl_set *context;
559 isl_union_set *domain_set;
560 isl_union_map *schedule_map;
561 isl_ast_build *build;
562 isl_ast_print_options *print_options;
563 isl_ast_node *tree;
564 struct ast_build_userinfo build_info;
566 context = isl_set_copy(scop->context);
567 domain_set = isl_union_set_copy(scop->domain);
568 schedule_map = isl_union_map_copy(scop->schedule);
569 schedule_map = isl_union_map_intersect_domain(schedule_map, domain_set);
571 build = isl_ast_build_from_context(context);
572 build = isl_ast_build_set_at_each_domain(build, &at_each_domain, scop);
574 if (options->openmp) {
575 build_info.scop = scop;
576 build_info.in_parallel_for = 0;
578 build = isl_ast_build_set_before_each_for(build,
579 &ast_build_before_for,
580 &build_info);
581 build = isl_ast_build_set_after_each_for(build,
582 &ast_build_after_for,
583 &build_info);
586 tree = isl_ast_build_ast_from_schedule(build, schedule_map);
587 isl_ast_build_free(build);
589 print_options = isl_ast_print_options_alloc(ctx);
590 print_options = isl_ast_print_options_set_print_user(print_options,
591 &print_user, NULL);
593 print_options = isl_ast_print_options_set_print_for(print_options,
594 &print_for, NULL);
596 p = isl_ast_node_print_macros(tree, p);
597 p = isl_ast_node_print(tree, p, print_options);
599 isl_ast_node_free(tree);
601 return p;
604 /* Does "scop" refer to any arrays that are declared, but not
605 * exposed to the code after the scop?
607 static int any_hidden_declarations(struct ppcg_scop *scop)
609 int i;
611 if (!scop)
612 return 0;
614 for (i = 0; i < scop->n_array; ++i)
615 if (scop->arrays[i]->declared && !scop->arrays[i]->exposed)
616 return 1;
618 return 0;
621 int generate_cpu(isl_ctx *ctx, struct ppcg_scop *ps,
622 struct ppcg_options *options, const char *input, const char *output)
624 FILE *input_file;
625 FILE *output_file;
626 isl_printer *p;
627 int hidden;
629 if (!ps)
630 return -1;
632 input_file = fopen(input, "r");
633 output_file = get_output_file(input, output);
635 copy(input_file, output_file, 0, ps->start);
636 fprintf(output_file, "/* ppcg generated CPU code */\n\n");
637 p = isl_printer_to_file(ctx, output_file);
638 p = isl_printer_set_output_format(p, ISL_FORMAT_C);
639 p = ppcg_print_exposed_declarations(p, ps);
640 hidden = any_hidden_declarations(ps);
641 if (hidden) {
642 p = ppcg_start_block(p);
643 p = ppcg_print_hidden_declarations(p, ps);
645 p = print_scop(ctx, ps, p, options);
646 if (hidden)
647 p = ppcg_end_block(p);
648 isl_printer_free(p);
649 copy(input_file, output_file, ps->end, -1);
651 fclose(output_file);
652 fclose(input_file);
654 return 0;