gpu.c: create_access_leaf: pass ppcg_kernel pointer instead of gpu_gen pointer
[ppcg.git] / cpu.c
blob469ad37c93cef1bd47e689ca4e3d715dfcae0e4d
1 /*
2 * Copyright 2012 INRIA Paris-Rocquencourt
4 * Use of this software is governed by the MIT license
6 * Written by Tobias Grosser, INRIA Paris-Rocquencourt,
7 * Domaine de Voluceau, Rocquenqourt, B.P. 105,
8 * 78153 Le Chesnay Cedex France
9 */
11 #include <limits.h>
12 #include <stdio.h>
13 #include <string.h>
15 #include <isl/aff.h>
16 #include <isl/ctx.h>
17 #include <isl/map.h>
18 #include <isl/ast_build.h>
19 #include <pet.h>
21 #include "ppcg.h"
22 #include "ppcg_options.h"
23 #include "cpu.h"
24 #include "print.h"
26 /* Representation of a statement inside a generated AST.
28 * "stmt" refers to the original statement.
29 * "ref2expr" maps the reference identifier of each access in
30 * the statement to an AST expression that should be printed
31 * at the place of the access.
33 struct ppcg_stmt {
34 struct pet_stmt *stmt;
36 isl_id_to_ast_expr *ref2expr;
39 static void ppcg_stmt_free(void *user)
41 struct ppcg_stmt *stmt = user;
42 int i;
44 if (!stmt)
45 return;
47 isl_id_to_ast_expr_free(stmt->ref2expr);
49 free(stmt);
52 /* Derive the output file name from the input file name.
53 * 'input' is the entire path of the input file. The output
54 * is the file name plus the additional extension.
56 * We will basically replace everything after the last point
57 * with '.ppcg.c'. This means file.c becomes file.ppcg.c
59 static FILE *get_output_file(const char *input, const char *output)
61 char name[PATH_MAX];
62 const char *ext;
63 const char ppcg_marker[] = ".ppcg";
64 int len;
66 len = ppcg_extract_base_name(name, input);
68 strcpy(name + len, ppcg_marker);
69 ext = strrchr(input, '.');
70 strcpy(name + len + sizeof(ppcg_marker) - 1, ext ? ext : ".c");
72 if (!output)
73 output = name;
75 return fopen(output, "w");
78 /* Data used to annotate for nodes in the ast.
80 struct ast_node_userinfo {
81 /* The for node is an openmp parallel for node. */
82 int is_openmp;
85 /* Information used while building the ast.
87 struct ast_build_userinfo {
88 /* The current ppcg scop. */
89 struct ppcg_scop *scop;
91 /* Are we currently in a parallel for loop? */
92 int in_parallel_for;
95 /* Check if the current scheduling dimension is parallel.
97 * We check for parallelism by verifying that the loop does not carry any
98 * dependences.
99 * If the live_range_reordering option is set, then this currently
100 * includes the order dependences. In principle, non-zero order dependences
101 * could be allowed, but this would require privatization and/or expansion.
103 * Parallelism test: if the distance is zero in all outer dimensions, then it
104 * has to be zero in the current dimension as well.
105 * Implementation: first, translate dependences into time space, then force
106 * outer dimensions to be equal. If the distance is zero in the current
107 * dimension, then the loop is parallel.
108 * The distance is zero in the current dimension if it is a subset of a map
109 * with equal values for the current dimension.
111 static int ast_schedule_dim_is_parallel(__isl_keep isl_ast_build *build,
112 struct ppcg_scop *scop)
114 isl_union_map *schedule_node, *schedule, *deps;
115 isl_map *schedule_deps, *test;
116 isl_space *schedule_space;
117 unsigned i, dimension, is_parallel;
119 schedule = isl_ast_build_get_schedule(build);
120 schedule_space = isl_ast_build_get_schedule_space(build);
122 dimension = isl_space_dim(schedule_space, isl_dim_out) - 1;
124 deps = isl_union_map_copy(scop->dep_flow);
125 deps = isl_union_map_union(deps, isl_union_map_copy(scop->dep_false));
126 if (scop->options->live_range_reordering) {
127 isl_union_map *order = isl_union_map_copy(scop->dep_order);
128 deps = isl_union_map_union(deps, order);
130 deps = isl_union_map_apply_range(deps, isl_union_map_copy(schedule));
131 deps = isl_union_map_apply_domain(deps, schedule);
133 if (isl_union_map_is_empty(deps)) {
134 isl_union_map_free(deps);
135 isl_space_free(schedule_space);
136 return 1;
139 schedule_deps = isl_map_from_union_map(deps);
141 for (i = 0; i < dimension; i++)
142 schedule_deps = isl_map_equate(schedule_deps, isl_dim_out, i,
143 isl_dim_in, i);
145 test = isl_map_universe(isl_map_get_space(schedule_deps));
146 test = isl_map_equate(test, isl_dim_out, dimension, isl_dim_in,
147 dimension);
148 is_parallel = isl_map_is_subset(schedule_deps, test);
150 isl_space_free(schedule_space);
151 isl_map_free(test);
152 isl_map_free(schedule_deps);
154 return is_parallel;
157 /* Mark a for node openmp parallel, if it is the outermost parallel for node.
159 static void mark_openmp_parallel(__isl_keep isl_ast_build *build,
160 struct ast_build_userinfo *build_info,
161 struct ast_node_userinfo *node_info)
163 if (build_info->in_parallel_for)
164 return;
166 if (ast_schedule_dim_is_parallel(build, build_info->scop)) {
167 build_info->in_parallel_for = 1;
168 node_info->is_openmp = 1;
172 /* Allocate an ast_node_info structure and initialize it with default values.
174 static struct ast_node_userinfo *allocate_ast_node_userinfo()
176 struct ast_node_userinfo *node_info;
177 node_info = (struct ast_node_userinfo *)
178 malloc(sizeof(struct ast_node_userinfo));
179 node_info->is_openmp = 0;
180 return node_info;
183 /* Free an ast_node_info structure.
185 static void free_ast_node_userinfo(void *ptr)
187 struct ast_node_userinfo *info;
188 info = (struct ast_node_userinfo *) ptr;
189 free(info);
192 /* This method is executed before the construction of a for node. It creates
193 * an isl_id that is used to annotate the subsequently generated ast for nodes.
195 * In this function we also run the following analyses:
197 * - Detection of openmp parallel loops
199 static __isl_give isl_id *ast_build_before_for(
200 __isl_keep isl_ast_build *build, void *user)
202 isl_id *id;
203 struct ast_build_userinfo *build_info;
204 struct ast_node_userinfo *node_info;
206 build_info = (struct ast_build_userinfo *) user;
207 node_info = allocate_ast_node_userinfo();
208 id = isl_id_alloc(isl_ast_build_get_ctx(build), "", node_info);
209 id = isl_id_set_free_user(id, free_ast_node_userinfo);
211 mark_openmp_parallel(build, build_info, node_info);
213 return id;
216 /* This method is executed after the construction of a for node.
218 * It performs the following actions:
220 * - Reset the 'in_parallel_for' flag, as soon as we leave a for node,
221 * that is marked as openmp parallel.
224 static __isl_give isl_ast_node *ast_build_after_for(__isl_take isl_ast_node *node,
225 __isl_keep isl_ast_build *build, void *user) {
226 isl_id *id;
227 struct ast_build_userinfo *build_info;
228 struct ast_node_userinfo *info;
230 id = isl_ast_node_get_annotation(node);
231 info = isl_id_get_user(id);
233 if (info && info->is_openmp) {
234 build_info = (struct ast_build_userinfo *) user;
235 build_info->in_parallel_for = 0;
238 isl_id_free(id);
240 return node;
243 /* Find the element in scop->stmts that has the given "id".
245 static struct pet_stmt *find_stmt(struct ppcg_scop *scop, __isl_keep isl_id *id)
247 int i;
249 for (i = 0; i < scop->pet->n_stmt; ++i) {
250 struct pet_stmt *stmt = scop->pet->stmts[i];
251 isl_id *id_i;
253 id_i = isl_set_get_tuple_id(stmt->domain);
254 isl_id_free(id_i);
256 if (id_i == id)
257 return stmt;
260 isl_die(isl_id_get_ctx(id), isl_error_internal,
261 "statement not found", return NULL);
264 /* Print a user statement in the generated AST.
265 * The ppcg_stmt has been attached to the node in at_each_domain.
267 static __isl_give isl_printer *print_user(__isl_take isl_printer *p,
268 __isl_take isl_ast_print_options *print_options,
269 __isl_keep isl_ast_node *node, void *user)
271 struct ppcg_stmt *stmt;
272 isl_id *id;
274 id = isl_ast_node_get_annotation(node);
275 stmt = isl_id_get_user(id);
276 isl_id_free(id);
278 p = pet_stmt_print_body(stmt->stmt, p, stmt->ref2expr);
280 isl_ast_print_options_free(print_options);
282 return p;
286 /* Print a for loop node as an openmp parallel loop.
288 * To print an openmp parallel loop we print a normal for loop, but add
289 * "#pragma openmp parallel for" in front.
291 * Variables that are declared within the body of this for loop are
292 * automatically openmp 'private'. Iterators declared outside of the
293 * for loop are automatically openmp 'shared'. As ppcg declares all iterators
294 * at the position where they are assigned, there is no need to explicitly mark
295 * variables. Their automatically assigned type is already correct.
297 * This function only generates valid OpenMP code, if the ast was generated
298 * with the 'atomic-bounds' option enabled.
301 static __isl_give isl_printer *print_for_with_openmp(
302 __isl_keep isl_ast_node *node, __isl_take isl_printer *p,
303 __isl_take isl_ast_print_options *print_options)
305 p = isl_printer_start_line(p);
306 p = isl_printer_print_str(p, "#pragma omp parallel for");
307 p = isl_printer_end_line(p);
309 p = isl_ast_node_for_print(node, p, print_options);
311 return p;
314 /* Print a for node.
316 * Depending on how the node is annotated, we either print a normal
317 * for node or an openmp parallel for node.
319 static __isl_give isl_printer *print_for(__isl_take isl_printer *p,
320 __isl_take isl_ast_print_options *print_options,
321 __isl_keep isl_ast_node *node, void *user)
323 struct ppcg_print_info *print_info;
324 isl_id *id;
325 int openmp;
327 openmp = 0;
328 id = isl_ast_node_get_annotation(node);
330 if (id) {
331 struct ast_node_userinfo *info;
333 info = (struct ast_node_userinfo *) isl_id_get_user(id);
334 if (info && info->is_openmp)
335 openmp = 1;
338 if (openmp)
339 p = print_for_with_openmp(node, p, print_options);
340 else
341 p = isl_ast_node_for_print(node, p, print_options);
343 isl_id_free(id);
345 return p;
348 /* Index transformation callback for pet_stmt_build_ast_exprs.
350 * "index" expresses the array indices in terms of statement iterators
351 * "iterator_map" expresses the statement iterators in terms of
352 * AST loop iterators.
354 * The result expresses the array indices in terms of
355 * AST loop iterators.
357 static __isl_give isl_multi_pw_aff *pullback_index(
358 __isl_take isl_multi_pw_aff *index, __isl_keep isl_id *id, void *user)
360 isl_pw_multi_aff *iterator_map = user;
362 iterator_map = isl_pw_multi_aff_copy(iterator_map);
363 return isl_multi_pw_aff_pullback_pw_multi_aff(index, iterator_map);
366 /* Transform the accesses in the statement associated to the domain
367 * called by "node" to refer to the AST loop iterators, construct
368 * corresponding AST expressions using "build",
369 * collect them in a ppcg_stmt and annotate the node with the ppcg_stmt.
371 static __isl_give isl_ast_node *at_each_domain(__isl_take isl_ast_node *node,
372 __isl_keep isl_ast_build *build, void *user)
374 struct ppcg_scop *scop = user;
375 isl_ast_expr *expr, *arg;
376 isl_ctx *ctx;
377 isl_id *id;
378 isl_map *map;
379 isl_pw_multi_aff *iterator_map;
380 struct ppcg_stmt *stmt;
382 ctx = isl_ast_node_get_ctx(node);
383 stmt = isl_calloc_type(ctx, struct ppcg_stmt);
384 if (!stmt)
385 goto error;
387 expr = isl_ast_node_user_get_expr(node);
388 arg = isl_ast_expr_get_op_arg(expr, 0);
389 isl_ast_expr_free(expr);
390 id = isl_ast_expr_get_id(arg);
391 isl_ast_expr_free(arg);
392 stmt->stmt = find_stmt(scop, id);
393 isl_id_free(id);
394 if (!stmt->stmt)
395 goto error;
397 map = isl_map_from_union_map(isl_ast_build_get_schedule(build));
398 map = isl_map_reverse(map);
399 iterator_map = isl_pw_multi_aff_from_map(map);
400 stmt->ref2expr = pet_stmt_build_ast_exprs(stmt->stmt, build,
401 &pullback_index, iterator_map, NULL, NULL);
402 isl_pw_multi_aff_free(iterator_map);
404 id = isl_id_alloc(isl_ast_node_get_ctx(node), NULL, stmt);
405 id = isl_id_set_free_user(id, &ppcg_stmt_free);
406 return isl_ast_node_set_annotation(node, id);
407 error:
408 ppcg_stmt_free(stmt);
409 return isl_ast_node_free(node);
412 /* Set *depth to the number of scheduling dimensions
413 * for the schedule of the first domain.
414 * We assume here that this number is the same for all domains.
416 static int set_depth(__isl_take isl_map *map, void *user)
418 unsigned *depth = user;
420 *depth = isl_map_dim(map, isl_dim_out);
422 isl_map_free(map);
423 return -1;
426 /* Code generate the scop 'scop' and print the corresponding C code to 'p'.
428 static __isl_give isl_printer *print_scop(struct ppcg_scop *scop,
429 __isl_take isl_printer *p, struct ppcg_options *options)
431 isl_ctx *ctx = isl_printer_get_ctx(p);
432 isl_set *context;
433 isl_union_set *domain_set;
434 isl_union_map *schedule_map;
435 isl_ast_build *build;
436 isl_ast_print_options *print_options;
437 isl_ast_node *tree;
438 isl_id_list *iterators;
439 struct ast_build_userinfo build_info;
440 int depth;
442 context = isl_set_copy(scop->context);
443 domain_set = isl_union_set_copy(scop->domain);
444 schedule_map = isl_schedule_get_map(scop->schedule);
445 schedule_map = isl_union_map_intersect_domain(schedule_map, domain_set);
447 isl_union_map_foreach_map(schedule_map, &set_depth, &depth);
449 build = isl_ast_build_from_context(context);
450 iterators = ppcg_scop_generate_names(scop, depth, "c");
451 build = isl_ast_build_set_iterators(build, iterators);
452 build = isl_ast_build_set_at_each_domain(build, &at_each_domain, scop);
454 if (options->openmp) {
455 build_info.scop = scop;
456 build_info.in_parallel_for = 0;
458 build = isl_ast_build_set_before_each_for(build,
459 &ast_build_before_for,
460 &build_info);
461 build = isl_ast_build_set_after_each_for(build,
462 &ast_build_after_for,
463 &build_info);
466 tree = isl_ast_build_node_from_schedule_map(build, schedule_map);
467 isl_ast_build_free(build);
469 print_options = isl_ast_print_options_alloc(ctx);
470 print_options = isl_ast_print_options_set_print_user(print_options,
471 &print_user, NULL);
473 print_options = isl_ast_print_options_set_print_for(print_options,
474 &print_for, NULL);
476 p = isl_ast_node_print_macros(tree, p);
477 p = isl_ast_node_print(tree, p, print_options);
479 isl_ast_node_free(tree);
481 return p;
484 /* Does "scop" refer to any arrays that are declared, but not
485 * exposed to the code after the scop?
487 static int any_hidden_declarations(struct ppcg_scop *scop)
489 int i;
491 if (!scop)
492 return 0;
494 for (i = 0; i < scop->pet->n_array; ++i)
495 if (scop->pet->arrays[i]->declared &&
496 !scop->pet->arrays[i]->exposed)
497 return 1;
499 return 0;
502 /* Generate CPU code for the scop "ps" and print the corresponding C code
503 * to "p", including variable declarations.
505 __isl_give isl_printer *print_cpu(__isl_take isl_printer *p,
506 struct ppcg_scop *ps, struct ppcg_options *options)
508 int hidden;
510 p = isl_printer_start_line(p);
511 p = isl_printer_print_str(p, "/* ppcg generated CPU code */");
512 p = isl_printer_end_line(p);
514 p = isl_printer_start_line(p);
515 p = isl_printer_end_line(p);
517 p = ppcg_print_exposed_declarations(p, ps);
518 hidden = any_hidden_declarations(ps);
519 if (hidden) {
520 p = ppcg_start_block(p);
521 p = ppcg_print_hidden_declarations(p, ps);
523 if (options->debug->dump_final_schedule)
524 isl_schedule_dump(ps->schedule);
525 p = print_scop(ps, p, options);
526 if (hidden)
527 p = ppcg_end_block(p);
529 return p;
532 /* Wrapper around print_cpu for use as a ppcg_transform callback.
534 static __isl_give isl_printer *print_cpu_wrap(__isl_take isl_printer *p,
535 struct ppcg_scop *scop, void *user)
537 struct ppcg_options *options = user;
539 return print_cpu(p, scop, options);
542 /* Transform the code in the file called "input" by replacing
543 * all scops by corresponding CPU code and write the results to a file
544 * called "output".
546 int generate_cpu(isl_ctx *ctx, struct ppcg_options *options,
547 const char *input, const char *output)
549 FILE *output_file;
550 int r;
552 output_file = get_output_file(input, output);
554 r = ppcg_transform(ctx, input, output_file, options,
555 &print_cpu_wrap, options);
557 fclose(output_file);
559 return r;