README: update latest release of clang
[ppcg.git] / cpu.c
blobc9ce02837bc8cdd19dd1dbcc182d24f992a42c9c
1 /*
2 * Copyright 2012 INRIA Paris-Rocquencourt
4 * Use of this software is governed by the MIT license
6 * Written by Tobias Grosser, INRIA Paris-Rocquencourt,
7 * Domaine de Voluceau, Rocquenqourt, B.P. 105,
8 * 78153 Le Chesnay Cedex France
9 */
11 #include <limits.h>
12 #include <stdio.h>
13 #include <string.h>
15 #include <isl/aff.h>
16 #include <isl/ctx.h>
17 #include <isl/map.h>
18 #include <isl/ast_build.h>
19 #include <pet.h>
21 #include "ppcg.h"
22 #include "ppcg_options.h"
23 #include "cpu.h"
24 #include "print.h"
26 /* Representation of a statement inside a generated AST.
28 * "stmt" refers to the original statement.
29 * "ref2expr" maps the reference identifier of each access in
30 * the statement to an AST expression that should be printed
31 * at the place of the access.
33 struct ppcg_stmt {
34 struct pet_stmt *stmt;
36 isl_id_to_ast_expr *ref2expr;
39 static void ppcg_stmt_free(void *user)
41 struct ppcg_stmt *stmt = user;
42 int i;
44 if (!stmt)
45 return;
47 isl_id_to_ast_expr_free(stmt->ref2expr);
49 free(stmt);
52 /* Derive the output file name from the input file name.
53 * 'input' is the entire path of the input file. The output
54 * is the file name plus the additional extension.
56 * We will basically replace everything after the last point
57 * with '.ppcg.c'. This means file.c becomes file.ppcg.c
59 static FILE *get_output_file(const char *input, const char *output)
61 char name[PATH_MAX];
62 const char *ext;
63 const char ppcg_marker[] = ".ppcg";
64 int len;
65 FILE *file;
67 len = ppcg_extract_base_name(name, input);
69 strcpy(name + len, ppcg_marker);
70 ext = strrchr(input, '.');
71 strcpy(name + len + sizeof(ppcg_marker) - 1, ext ? ext : ".c");
73 if (!output)
74 output = name;
76 file = fopen(output, "w");
77 if (!file) {
78 fprintf(stderr, "Unable to open '%s' for writing\n", output);
79 return NULL;
82 return file;
85 /* Data used to annotate for nodes in the ast.
87 struct ast_node_userinfo {
88 /* The for node is an openmp parallel for node. */
89 int is_openmp;
92 /* Information used while building the ast.
94 struct ast_build_userinfo {
95 /* The current ppcg scop. */
96 struct ppcg_scop *scop;
98 /* Are we currently in a parallel for loop? */
99 int in_parallel_for;
102 /* Check if the current scheduling dimension is parallel.
104 * We check for parallelism by verifying that the loop does not carry any
105 * dependences.
106 * If the live_range_reordering option is set, then this currently
107 * includes the order dependences. In principle, non-zero order dependences
108 * could be allowed, but this would require privatization and/or expansion.
110 * Parallelism test: if the distance is zero in all outer dimensions, then it
111 * has to be zero in the current dimension as well.
112 * Implementation: first, translate dependences into time space, then force
113 * outer dimensions to be equal. If the distance is zero in the current
114 * dimension, then the loop is parallel.
115 * The distance is zero in the current dimension if it is a subset of a map
116 * with equal values for the current dimension.
118 static int ast_schedule_dim_is_parallel(__isl_keep isl_ast_build *build,
119 struct ppcg_scop *scop)
121 isl_union_map *schedule_node, *schedule, *deps;
122 isl_map *schedule_deps, *test;
123 isl_space *schedule_space;
124 unsigned i, dimension, is_parallel;
126 schedule = isl_ast_build_get_schedule(build);
127 schedule_space = isl_ast_build_get_schedule_space(build);
129 dimension = isl_space_dim(schedule_space, isl_dim_out) - 1;
131 deps = isl_union_map_copy(scop->dep_flow);
132 deps = isl_union_map_union(deps, isl_union_map_copy(scop->dep_false));
133 if (scop->options->live_range_reordering) {
134 isl_union_map *order = isl_union_map_copy(scop->dep_order);
135 deps = isl_union_map_union(deps, order);
137 deps = isl_union_map_apply_range(deps, isl_union_map_copy(schedule));
138 deps = isl_union_map_apply_domain(deps, schedule);
140 if (isl_union_map_is_empty(deps)) {
141 isl_union_map_free(deps);
142 isl_space_free(schedule_space);
143 return 1;
146 schedule_deps = isl_map_from_union_map(deps);
148 for (i = 0; i < dimension; i++)
149 schedule_deps = isl_map_equate(schedule_deps, isl_dim_out, i,
150 isl_dim_in, i);
152 test = isl_map_universe(isl_map_get_space(schedule_deps));
153 test = isl_map_equate(test, isl_dim_out, dimension, isl_dim_in,
154 dimension);
155 is_parallel = isl_map_is_subset(schedule_deps, test);
157 isl_space_free(schedule_space);
158 isl_map_free(test);
159 isl_map_free(schedule_deps);
161 return is_parallel;
164 /* Mark a for node openmp parallel, if it is the outermost parallel for node.
166 static void mark_openmp_parallel(__isl_keep isl_ast_build *build,
167 struct ast_build_userinfo *build_info,
168 struct ast_node_userinfo *node_info)
170 if (build_info->in_parallel_for)
171 return;
173 if (ast_schedule_dim_is_parallel(build, build_info->scop)) {
174 build_info->in_parallel_for = 1;
175 node_info->is_openmp = 1;
179 /* Allocate an ast_node_info structure and initialize it with default values.
181 static struct ast_node_userinfo *allocate_ast_node_userinfo()
183 struct ast_node_userinfo *node_info;
184 node_info = (struct ast_node_userinfo *)
185 malloc(sizeof(struct ast_node_userinfo));
186 node_info->is_openmp = 0;
187 return node_info;
190 /* Free an ast_node_info structure.
192 static void free_ast_node_userinfo(void *ptr)
194 struct ast_node_userinfo *info;
195 info = (struct ast_node_userinfo *) ptr;
196 free(info);
199 /* This method is executed before the construction of a for node. It creates
200 * an isl_id that is used to annotate the subsequently generated ast for nodes.
202 * In this function we also run the following analyses:
204 * - Detection of openmp parallel loops
206 static __isl_give isl_id *ast_build_before_for(
207 __isl_keep isl_ast_build *build, void *user)
209 isl_id *id;
210 struct ast_build_userinfo *build_info;
211 struct ast_node_userinfo *node_info;
213 build_info = (struct ast_build_userinfo *) user;
214 node_info = allocate_ast_node_userinfo();
215 id = isl_id_alloc(isl_ast_build_get_ctx(build), "", node_info);
216 id = isl_id_set_free_user(id, free_ast_node_userinfo);
218 mark_openmp_parallel(build, build_info, node_info);
220 return id;
223 /* This method is executed after the construction of a for node.
225 * It performs the following actions:
227 * - Reset the 'in_parallel_for' flag, as soon as we leave a for node,
228 * that is marked as openmp parallel.
231 static __isl_give isl_ast_node *ast_build_after_for(__isl_take isl_ast_node *node,
232 __isl_keep isl_ast_build *build, void *user) {
233 isl_id *id;
234 struct ast_build_userinfo *build_info;
235 struct ast_node_userinfo *info;
237 id = isl_ast_node_get_annotation(node);
238 info = isl_id_get_user(id);
240 if (info && info->is_openmp) {
241 build_info = (struct ast_build_userinfo *) user;
242 build_info->in_parallel_for = 0;
245 isl_id_free(id);
247 return node;
250 /* Find the element in scop->stmts that has the given "id".
252 static struct pet_stmt *find_stmt(struct ppcg_scop *scop, __isl_keep isl_id *id)
254 int i;
256 for (i = 0; i < scop->pet->n_stmt; ++i) {
257 struct pet_stmt *stmt = scop->pet->stmts[i];
258 isl_id *id_i;
260 id_i = isl_set_get_tuple_id(stmt->domain);
261 isl_id_free(id_i);
263 if (id_i == id)
264 return stmt;
267 isl_die(isl_id_get_ctx(id), isl_error_internal,
268 "statement not found", return NULL);
271 /* Print a user statement in the generated AST.
272 * The ppcg_stmt has been attached to the node in at_each_domain.
274 static __isl_give isl_printer *print_user(__isl_take isl_printer *p,
275 __isl_take isl_ast_print_options *print_options,
276 __isl_keep isl_ast_node *node, void *user)
278 struct ppcg_stmt *stmt;
279 isl_id *id;
281 id = isl_ast_node_get_annotation(node);
282 stmt = isl_id_get_user(id);
283 isl_id_free(id);
285 p = pet_stmt_print_body(stmt->stmt, p, stmt->ref2expr);
287 isl_ast_print_options_free(print_options);
289 return p;
293 /* Print a for loop node as an openmp parallel loop.
295 * To print an openmp parallel loop we print a normal for loop, but add
296 * "#pragma openmp parallel for" in front.
298 * Variables that are declared within the body of this for loop are
299 * automatically openmp 'private'. Iterators declared outside of the
300 * for loop are automatically openmp 'shared'. As ppcg declares all iterators
301 * at the position where they are assigned, there is no need to explicitly mark
302 * variables. Their automatically assigned type is already correct.
304 * This function only generates valid OpenMP code, if the ast was generated
305 * with the 'atomic-bounds' option enabled.
308 static __isl_give isl_printer *print_for_with_openmp(
309 __isl_keep isl_ast_node *node, __isl_take isl_printer *p,
310 __isl_take isl_ast_print_options *print_options)
312 p = isl_printer_start_line(p);
313 p = isl_printer_print_str(p, "#pragma omp parallel for");
314 p = isl_printer_end_line(p);
316 p = isl_ast_node_for_print(node, p, print_options);
318 return p;
321 /* Print a for node.
323 * Depending on how the node is annotated, we either print a normal
324 * for node or an openmp parallel for node.
326 static __isl_give isl_printer *print_for(__isl_take isl_printer *p,
327 __isl_take isl_ast_print_options *print_options,
328 __isl_keep isl_ast_node *node, void *user)
330 struct ppcg_print_info *print_info;
331 isl_id *id;
332 int openmp;
334 openmp = 0;
335 id = isl_ast_node_get_annotation(node);
337 if (id) {
338 struct ast_node_userinfo *info;
340 info = (struct ast_node_userinfo *) isl_id_get_user(id);
341 if (info && info->is_openmp)
342 openmp = 1;
345 if (openmp)
346 p = print_for_with_openmp(node, p, print_options);
347 else
348 p = isl_ast_node_for_print(node, p, print_options);
350 isl_id_free(id);
352 return p;
355 /* Index transformation callback for pet_stmt_build_ast_exprs.
357 * "index" expresses the array indices in terms of statement iterators
358 * "iterator_map" expresses the statement iterators in terms of
359 * AST loop iterators.
361 * The result expresses the array indices in terms of
362 * AST loop iterators.
364 static __isl_give isl_multi_pw_aff *pullback_index(
365 __isl_take isl_multi_pw_aff *index, __isl_keep isl_id *id, void *user)
367 isl_pw_multi_aff *iterator_map = user;
369 iterator_map = isl_pw_multi_aff_copy(iterator_map);
370 return isl_multi_pw_aff_pullback_pw_multi_aff(index, iterator_map);
373 /* Transform the accesses in the statement associated to the domain
374 * called by "node" to refer to the AST loop iterators, construct
375 * corresponding AST expressions using "build",
376 * collect them in a ppcg_stmt and annotate the node with the ppcg_stmt.
378 static __isl_give isl_ast_node *at_each_domain(__isl_take isl_ast_node *node,
379 __isl_keep isl_ast_build *build, void *user)
381 struct ppcg_scop *scop = user;
382 isl_ast_expr *expr, *arg;
383 isl_ctx *ctx;
384 isl_id *id;
385 isl_map *map;
386 isl_pw_multi_aff *iterator_map;
387 struct ppcg_stmt *stmt;
389 ctx = isl_ast_node_get_ctx(node);
390 stmt = isl_calloc_type(ctx, struct ppcg_stmt);
391 if (!stmt)
392 goto error;
394 expr = isl_ast_node_user_get_expr(node);
395 arg = isl_ast_expr_get_op_arg(expr, 0);
396 isl_ast_expr_free(expr);
397 id = isl_ast_expr_get_id(arg);
398 isl_ast_expr_free(arg);
399 stmt->stmt = find_stmt(scop, id);
400 isl_id_free(id);
401 if (!stmt->stmt)
402 goto error;
404 map = isl_map_from_union_map(isl_ast_build_get_schedule(build));
405 map = isl_map_reverse(map);
406 iterator_map = isl_pw_multi_aff_from_map(map);
407 stmt->ref2expr = pet_stmt_build_ast_exprs(stmt->stmt, build,
408 &pullback_index, iterator_map, NULL, NULL);
409 isl_pw_multi_aff_free(iterator_map);
411 id = isl_id_alloc(isl_ast_node_get_ctx(node), NULL, stmt);
412 id = isl_id_set_free_user(id, &ppcg_stmt_free);
413 return isl_ast_node_set_annotation(node, id);
414 error:
415 ppcg_stmt_free(stmt);
416 return isl_ast_node_free(node);
419 /* Set *depth to the number of scheduling dimensions
420 * for the schedule of the first domain.
421 * We assume here that this number is the same for all domains.
423 static isl_stat set_depth(__isl_take isl_map *map, void *user)
425 unsigned *depth = user;
427 *depth = isl_map_dim(map, isl_dim_out);
429 isl_map_free(map);
430 return isl_stat_error;
433 /* Code generate the scop 'scop' and print the corresponding C code to 'p'.
435 static __isl_give isl_printer *print_scop(struct ppcg_scop *scop,
436 __isl_take isl_printer *p, struct ppcg_options *options)
438 isl_ctx *ctx = isl_printer_get_ctx(p);
439 isl_set *context;
440 isl_union_set *domain_set;
441 isl_union_map *schedule_map;
442 isl_ast_build *build;
443 isl_ast_print_options *print_options;
444 isl_ast_node *tree;
445 isl_id_list *iterators;
446 struct ast_build_userinfo build_info;
447 int depth;
449 context = isl_set_copy(scop->context);
450 domain_set = isl_union_set_copy(scop->domain);
451 schedule_map = isl_schedule_get_map(scop->schedule);
452 schedule_map = isl_union_map_intersect_domain(schedule_map, domain_set);
454 isl_union_map_foreach_map(schedule_map, &set_depth, &depth);
456 build = isl_ast_build_from_context(context);
457 iterators = ppcg_scop_generate_names(scop, depth, "c");
458 build = isl_ast_build_set_iterators(build, iterators);
459 build = isl_ast_build_set_at_each_domain(build, &at_each_domain, scop);
461 if (options->openmp) {
462 build_info.scop = scop;
463 build_info.in_parallel_for = 0;
465 build = isl_ast_build_set_before_each_for(build,
466 &ast_build_before_for,
467 &build_info);
468 build = isl_ast_build_set_after_each_for(build,
469 &ast_build_after_for,
470 &build_info);
473 tree = isl_ast_build_node_from_schedule_map(build, schedule_map);
474 isl_ast_build_free(build);
476 print_options = isl_ast_print_options_alloc(ctx);
477 print_options = isl_ast_print_options_set_print_user(print_options,
478 &print_user, NULL);
480 print_options = isl_ast_print_options_set_print_for(print_options,
481 &print_for, NULL);
483 p = ppcg_print_macros(p, tree);
484 p = isl_ast_node_print(tree, p, print_options);
486 isl_ast_node_free(tree);
488 return p;
491 /* Generate CPU code for the scop "ps" and print the corresponding C code
492 * to "p", including variable declarations.
494 __isl_give isl_printer *print_cpu(__isl_take isl_printer *p,
495 struct ppcg_scop *ps, struct ppcg_options *options)
497 int hidden;
499 p = isl_printer_start_line(p);
500 p = isl_printer_print_str(p, "/* ppcg generated CPU code */");
501 p = isl_printer_end_line(p);
503 p = isl_printer_start_line(p);
504 p = isl_printer_end_line(p);
506 p = isl_ast_op_type_print_macro(isl_ast_op_fdiv_q, p);
507 p = ppcg_print_exposed_declarations(p, ps);
508 hidden = ppcg_scop_any_hidden_declarations(ps);
509 if (hidden) {
510 p = ppcg_start_block(p);
511 p = ppcg_print_hidden_declarations(p, ps);
513 if (options->debug->dump_final_schedule)
514 isl_schedule_dump(ps->schedule);
515 p = print_scop(ps, p, options);
516 if (hidden)
517 p = ppcg_end_block(p);
519 return p;
522 /* Wrapper around print_cpu for use as a ppcg_transform callback.
524 static __isl_give isl_printer *print_cpu_wrap(__isl_take isl_printer *p,
525 struct ppcg_scop *scop, void *user)
527 struct ppcg_options *options = user;
529 return print_cpu(p, scop, options);
532 /* Transform the code in the file called "input" by replacing
533 * all scops by corresponding CPU code and write the results to a file
534 * called "output".
536 int generate_cpu(isl_ctx *ctx, struct ppcg_options *options,
537 const char *input, const char *output)
539 FILE *output_file;
540 int r;
542 output_file = get_output_file(input, output);
543 if (!output_file)
544 return -1;
546 r = ppcg_transform(ctx, input, output_file, options,
547 &print_cpu_wrap, options);
549 fclose(output_file);
551 return r;