doc/clan.texi

   1 \input texinfo
   2 @c %
   3 @c %  /**-----------------------------------------------------------------**
   4 @c %   **                           OpenScop Library                      **
   5 @c %   **-----------------------------------------------------------------**
   6 @c %   **                            openscop.texi                        **
   7 @c %   **-----------------------------------------------------------------**
   8 @c %   **                 First version: september 10th 2006              **
   9 @c %   **-----------------------------------------------------------------**/
  10 @c %
  11 @c % release 0.0: May 4th 2008
  12 @c %
  13
  14 @c % /*************************************************************************
  15 @c %  *                              PART I: HEADER                           *
  16 @c %  *************************************************************************/
  17 @c %**start of header
  18 @setfilename openscop.info
  19 @settitle OpenScop - File Format and Data Structures for Polyhedral Compilation Tools
  20
  21 @set EDITION 1.0
  22 @set SPEC_VERSION 1.0
  23 @set LIB_VERSION 1.0
  24 @set UPDATED November 13th 2010
  25 @c @set UPDATED Coming soon
  26 @setchapternewpage odd
  27
  28 @c % This is to ask for A4 instead of Letter size document.
  29 @iftex
  30      @afourpaper
  31 @end iftex
  32
  33 @c %**end of header
  34
  35 @c % /************************************************************************
  36 @c %  *                PART II: SUMMARY DESCRIPTION AND COPYRIGHT            *
  37 @c %  ************************************************************************/
  38
  39 @copying
  40 This document describes OpenScop, a specification of a file format and a set
  41 of data structures for polyhedral compilation tools to be able to talk
  42 together. It also describes the OpenScop Library version @value{LIB_VERSION},
  43 a Free Software that provides an example of OpenScop implementation.
  44
  45 It would be quite kind to refer the following paper in any publication that
  46 results from the use of the OpenScop library (the reason to cite this paper is,
  47 amongst many other interesting thing, that it defines what is a @emph{SCoP},
  48 or @emph{static control part}):
  49
  50 @example
  51 @@InProceedings@{Bas03,
  52 @ @ author =@ @ @ @ @{C\'edric Bastoul and Albert Cohen and Sylvain Girbal and
  53 @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ Saurabh Sharma and Olivier Temam@},
  54 @ @ title =@ @ @ @ @ @{Putting Polyhedral Loop Transformations to Work@},
  55 @ @ booktitle =@ @{LCPC'16 International Workshop on Languages and
  56 @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ Compilers for Parallel Computers, LNCS 2958@},
  57 @ @ pages =@ @ @ @ @ @{209--225@},
  58 @ @ month =@ @ @ @ @ @{october@},
  59 @ @ year =@ @ @ @ @ @ 2003,
  60 @ @ address =@ @ @ @{College Station, Texas@}
  61 @}
  62 @end example
  63
  64 Copyright @copyright{} 2010 C@'edric Bastoul.
  65
  66 @c quotation
  67 Permission is granted to copy, distribute and/or modify this document under
  68 the terms of the GNU Free Documentation License, Version 1.2
  69 published by the Free Software Foundation. To receive a copy of the
  70 GNU Free Documentation License, write to the Free Software Foundation, Inc.,
  71 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA.
  72 @c end quotation
  73 @end copying
  74
  75 @c % /*************************************************************************
  76 @c %  *                 PART III: TITLEPAGE, CONTENTS, COPYRIGHT              *
  77 @c %  *************************************************************************/
  78 @titlepage
  79 @title OpenScop
  80 @subtitle File Format and Data Structures for Data Exchange in Polyhedral Compilation Tools
  81 @subtitle Edition @value{EDITION}, for OpenScop Specification @value{SPEC_VERSION} and OpenScop Library @value{LIB_VERSION}
  82 @subtitle @value{UPDATED}
  83 @author C@'edric Bastoul
  84
  85 @c The following two commands start the copyright page.
  86 @page
  87 @vskip 0pt plus 1filll
  88 @insertcopying
  89 @end titlepage
  90
  91 @c Output the table of contents at the beginning.
  92 @contents
  93
  94 @c % /*************************************************************************
  95 @c %  *                     PART IV: TOP NODE AND MASTER MENU                 *
  96 @c %  *************************************************************************/
  97 @ifnottex
  98 @node Top
  99 @top OpenSCop
 100
 101 @insertcopying
 102 @end ifnottex
 103
 104 @menu
 105 * Introduction::
 106 * Polyhedral Representation::
 107 * OpenScop File Format Specification::
 108 * Clan Library::
 109 * Installing::
 110 * Documentation::
 111 * References::
 112 @end menu
 113
 114
 115
 116 @c % /*************************************************************************
 117 @c %  *                       PART V: BODY OF THE DOCUMENT                    *
 118 @c %  *************************************************************************/
 119
 120 @c %  ****************************** INTRODUCTION ******************************
 121 @node Introduction
 122 @chapter Introduction
 123 OpenScop is an open specification that defines a file format and a set of
 124 data structures to represent a @emph{static control part} (SCoP for short),
 125 i.e., a program part that can be represented in the @emph{polyhedral model}.
 126 The goal of OpenScop is to provide a common interface to the different
 127 polyhedral compilation tools in order to simplify their interaction.
 128
 129 Designing a single format for tools that have different purposes
 130 (e.g., as different as code generation and data dependence analysis) may
 131 sound strange at first. However we could observe that most available
 132 polyhedral compilation tool during the last decade were manipulating
 133 more or less the same kind of data (polyhedra, affine functions...) and
 134 were actually sharing a part of their input (e.g., iteration domains and
 135 context concepts are nearly everywhere). We could also observe that
 136 those tools may rely on different internal representations, mostly based on
 137 one of the major polyhedral libraries (e.g., Polylib, PPL or isl), and
 138 this representation may change over time (e.g., when switching to a
 139 more convenient polyhedral library).
 140 The OpenScop aim is to provide a stable, unified format that offers a
 141 durable guarantee that a tool can use an output or provide an input to
 142 another tool without breaking a tool chain because of slight internal
 143 changes. The other promise of OpenScop is the ablility to assemble or
 144 replace the basic blocs of a polyhedral compilation framework at no, or
 145 at least low engineering cost.
 146
 147 The policy that drives OpenScop can be summarized by these three main rules:
 148 @itemize @bullet
 149 @item  Embbed the @emph{minimum} information to build a complete polyhedral
 150        compilation framework in the core part (to avoid as much as possible
 151        empty or useless fields for each tool).
 152 @item  Provide a @emph{very stable} core part (including powerful enough
 153        objects to support state-of-the-art techniques, to avoid
 154        spending time to maintain an OpenScop input or output),
 155 @item  Provide a @emph{very flexible} extension part (so OpenScop can also
 156        be used to test wild new ideas).
 157 @end itemize
 158
 159 The success of OpenScop in meeting its goals totally depend on its
 160 acceptance by the tool developpers (that have to support it in their tools).
 161 To help them, we provide an example implementation: the OpenScop Library.
 162 This library (and in particular its API) is not part of the OpenScop
 163 specification (which includes only the file format and the set of data
 164 structures). It is licensed under the 3-clause BSD license so developpers may
 165 feel free to use it in their code (either by linking it or copy-pasting its
 166 code). This document also describes this library in details.
 167 The current version of the OpenScop Library is still under evaluation,
 168 and there is no guarantee that the upward compatibility will be respected,
 169 even if we do think so. A lot of reports are
 170 necessary to freeze the library API. Thus you are very welcome and
 171 encouraged to send reports on bugs, wishes, critics, comments, suggestions
 172 or (please !) successful experiences to cedric.bastoul@@u-psud.fr.
 173
 174 This document is organized as follows. First, we provide some
 175 background on the polyhedral model and how it is used to represent and to
 176 manipulate programs (@pxref{Polyhedral Representation}). Next,
 177 we describe the OpenScop specification, from the file format
 178 (@pxref{OpenScop File Format Specification}) to the data structures and
 179 details of the OpenScop Library API
 180 (@pxref{OpenScop Data Structure Specification}).
 181 Finally we will detail how to install the OpenScop Library
 182 (@pxref{Installing}).
 183
 184
 185 @c %  ******************* POLYHEDRAL REPRESENTATION OF PROGRAMS ****************
 186 @node Polyhedral Representation
 187 @chapter Polyhedral Representation of Programs
 188 If you are reading at OpenScop's documentation, you probably don't need any
 189 explanation about the Polyhedral Model. It is unlikely someone will read this
 190 paper by chance. However some vicious advisor may ask their poor
 191 engineers/interns/students
 192 to work for the very first time on this exciting topic. Most papers on
 193 polyhedral compilation are hard to read. Despite my efforts,
 194 mine are no exception according to some reviewers... Hence I give there a new
 195 try to provide a comprehensive explanation of the polyhedral model without the
 196 size and style limits of a classical research paper.
 197
 198 Be aware that to be able to understand the polyhedral model, there are few
 199 prerequisites. You should not read the following while you still ignore
 200 what is:
 201 @itemize @bullet
 202 @item  a @code{for} loop construction in C programs (@code{do} loops in FORTRAN are OK too !),
 203 @item  an @emph{affine expression},
 204 @item  a @emph{vector},
 205 @item  a @emph{matrix},
 206 @item  a @emph{matrix-vector multiply}.
 207 @end itemize
 208 If you do not know those concepts, please do some search and come back
 209 afterwards. And if you are courageous enough, send me a few lines that
 210 describe them so I can insert that here !
 211
 212 @menu
 213 * Motivation::
 214 * Thinking in Polyhedra::
 215 @end menu
 216
 217
 218 @node Motivation
 219 @section Motivation: Program Transformations
 220
 221 A direct translation of high level programs written, e.g., in C, to assembly
 222 then to object code is likely to produce (very) inefficient applications.
 223 Architectures are
 224 quite complex, including several levels of cache memory, many cores, deep
 225 pipelines, various number of functionnal units, of registers etc.
 226 The list of such
 227 "architectural features" is growing with each new generation of processors.
 228 To achieve the best performance, the object program must do a smart use
 229 of these features.
 230 Programmers use high level languages for productivity and portability:
 231 typically they do not have to take care of the target architecture but
 232 to ensure they do write programs that produce the right output. Hence,
 233 the problem of mapping the program to the target architecture in the most
 234 efficient way is left to the compiler.
 235
 236 The compiler may see a high level program as a specification
 237 @emph{of an output}. The program is a list of operations to be executed to
 238 produce the output. As long as the output is guaranteed to be as the
 239 programmer specified in his code, the compiler is free to modify
 240 the program.
 241 For instance, let us imagine we are working on an architecture with only
 242 three registers and we consider the following statements written by
 243 a programmer:
 244
 245 @example
 246 @group
 247 x = a + b;
 248 y = c + d;
 249 z = a * b;
 250 @end group
 251 @end example
 252
 253 It is easy to see that we can reorder the three statements in any way without
 254 modifying the semantics (no statement reads or writes a variable that another
 255 statement writes). Because of the lack of registers, the solutions such that
 256 the first and the third statements are one after the other are better
 257 because @code{a} and @code{b} will be put in the processor registers by
 258 one statement and can be reused directly by the other one
 259 without reading to memory (this is called a @emph{data locality
 260 improving} transformation). Hence a better statement order is, e.g.:
 261
 262 @example
 263 @group
 264 x = a + b;
 265 z = a * b; // a and b are still in processor registers
 266 y = c + d;
 267 @end group
 268 @end example
 269
 270 We could also notice that it is possible to run the three statements in
 271 parallel and explicit this in the way the compiler and/or the architecture is
 272 able to understand. Here we use OpenMP to describe parallelism
 273 (this is called a @emph{parallelizing} transformation):
 274
 275 @example
 276 @group
 277 #pragma omp parallel sections
 278 @{
 279    #pragma omp section
 280    @{
 281      x = a + b;
 282    @}
 283    #pragma omp section
 284    @{
 285      y = c + d;
 286    @}
 287    #pragma omp section
 288    @{
 289      z = a * b;
 290    @}
 291 @}
 292 @end group
 293 @end example
 294
 295 However, the right way to optimize this program is probably a mix of these
 296 two techniques, especially if the target architecture have some limitations
 297 to run too many operations in parallel:
 298
 299 @example
 300 @group
 301 #pragma omp parallel sections
 302 @{
 303    #pragma omp section
 304    @{
 305      x = a + b;
 306      z = a * b;
 307    @}
 308    #pragma omp section
 309    @{
 310      y = c + d;
 311    @}
 312 @}
 313 @end group
 314 @end example
 315
 316 Such transformations are quite trivial. The reason is the statements are
 317 executed only once. The real sport begins when we have to deal with loops
 318 as we will see momentarily. However, polyhedral compilation framework users
 319 have to be conscious that we @emph{need} to transform programs to achieve
 320 the best performance and that the best transformation  that have to be
 321 discovered (with often many, many efforts) and performed may be
 322 quite complex. Hence the need of powerful model and tools.
 323
 324
 325 @node Thinking in Polyhedra
 326 @section Thinking in Polyhedra
 327
 328
 329 Since the very first compilers, the internal representation of programs
 330 is the @emph{Abstract Syntax Tree}, or AST. In such representation,
 331 each statement appears only once even if it is executed many times (e.g.,
 332 when it is enclosed inside a loop).
 333
 334 @noindent @strong{This is a limitation for program analysis.}
 335 For instance if a statement @emph{depends} on another statement (i.e., they
 336 access the same memory location and at least one of these accesses is a
 337 write), we will consider both statements as unique entities while the
 338 dependence relation may involve only few statement executions.
 339
 340 @noindent @strong{This is a limitation for program transformations.}
 341 Loop transformations operate on statement executions. For instance,
 342 because they consider all statement executions at the same time, present day
 343 production compilers are not able to achieve loop fusion (that tries to
 344 merge the loop bodies of two loops) if the loop bounds
 345 of the two loops do not match (hahaha).
 346
 347 @noindent @strong{This is a limitation for program manipulation flexibility.}
 348 Trees are very rigid data structures that are not easy to manipulate.
 349 Program transformation may require very complex transformations that will
 350 imply deep modifications of the control flow. Hence, for complex program
 351 restructuration, the need for a more precise, more flexible representation.
 352
 353 The Polyhedral Model is a convenient alternative representation which
 354 combines analysis power, expressiveness and high flexibility. The drawback
 355 is it breaks the classical structure of programs that every programmer
 356 is familiar with. It requires some (real) efforts
 357 to be smoothly manipulated, but it definitely worth it. It is based on three
 358 main concepts, @emph{iteration domain},  @emph{scattering function} and
 359 @emph{access function} that are described in depth in the
 360 following sections.
 361
 362 A program part that can be represented using the Polyhedral Model is called
 363 a @strong{Static Control Part} or @strong{SCoP} for short.
 364
 365 @menu
 366 * Iteration Domain::
 367 * Scattering Function::
 368 * Access Function::
 369 @end menu
 370
 371 @node Iteration Domain
 372 @subsection Iteration Domain
 373
 374 The key aspect of the polyhedral model is to consider @emph{statement
 375 instances}. A statement instance is @emph{one} execution of a statement.
 376 A statement
 377 outside a loop has only one instance while those inside loops may have many.
 378 Let us consider the following code with two statements @code{S1}
 379 and @code{S2}:
 380
 381 @example
 382 @group
 383 pi = 3.14;               // S1
 384 for (i = 0; i < 5; i++)
 385   A[i] = pi;             // S2
 386 @end group
 387 @end example
 388
 389 The list of statement instances is the following (we just have to fully
 390 unroll the loop):
 391
 392 @example
 393 @group
 394 pi = 3.14;
 395 A[0] = pi;
 396 A[1] = pi;
 397 A[2] = pi;
 398 A[3] = pi;
 399 A[4] = pi;
 400 @end group
 401 @end example
 402
 403 Each instance of a statement which is enclosed inside a loop may be referred
 404 thanks to its outer loop counters (or @emph{iterators}). In the Polyhedral
 405 Model we consider statements as functions of the outer loop counters that may
 406 produce statement instances:
 407 instead of simply "@code{S2}", we use preferably the notation @code{S2(i)}.
 408 For instance we  denote the statement instance @code{A[3] = pi;} of the
 409 previous example as @code{S2(3)}. This means @emph{instance of
 410 statement @code{S2} for} @code{i = 3}.
 411 If a statement @code{S3} is enclosed inside two loops of iterators @code{i}
 412 (outermost loop) and @code{j} (innermost loop), we would denote it
 413 @code{S3(i,j)}, and so on with more enclosing loops.
 414
 415 The ordered list
 416 of iterators (ordered from the outermost iterator to the innermost iterator)
 417 is called the @strong{iteration vector}. For instance the iteration vector for
 418 @code{S3} is @code{(i,j)}, for @code{S2} it is @code{(i)}, and for @code{S1}
 419 it is empty since it has no enclosing loop: @code{()}. A more precise reading
 420 at the notation @code{S2(3)} would show that it denotes the instance of
 421 statement @code{S2} for the iteration vector @code{(2)}.
 422
 423 Obviously, dealing with statement instances does not mean we have to unroll all
 424 loops. First because there would be probably too many instances to deal with,
 425 and second because we probably just don't know how many instances there are.
 426 For instance in the following loop it is not possible to know (at compile time)
 427 how many times the statement @code{S3} will be executed:
 428
 429 @example
 430 @group
 431 for (i = 2; i <= N; i++)
 432   for (j = 2; j <= N; j++)
 433     A[i] = pi;             // S3
 434 @end group
 435 @end example
 436
 437 @noindent Such a loop is said to be @emph{parametric}: it depends on
 438 (at least) a value called a @emph{parameter} which is not modified
 439 during the execution of the whole loop, but is unknown at compile time.
 440 Here the only parameter is @code{N}.
 441
 442 A compact way to represent all the instances of a given statement
 443 is to consider the set of all possible values of its iteration vector.
 444 This set is called the @strong{iteration domain}. It can be conveniently
 445 described thanks to all the constraints on the various iterators the statement
 446 depends on. For instance, let us consider
 447 the statement @code{S3} of the previous program. The iteration domain is the set
 448 of iteration vectors @code{(i,j)}. Because of the parameter, we are not able to
 449 achieve a precise list of all possible values, it would look like this:
 450
 451 @example
 452 @group
 453 (2,2)  (2,3)  (2,4)  ...  (2,N)
 454 (3,2)  (3,3)  (3,4)  ...  (3,N)
 455 ...    ...    ...    ...  ...
 456 (N,2)  (N,3)  (N,4)  ...  (N,N)
 457 @end group
 458 @end example
 459
 460 @noindent A better way is to say it is the set
 461 of iteration vectors @code{(i,j)} such that @code{i} is an integer greater or
 462 equal than 2 and lower or equal than @code{N}, and @code{j} is an integer
 463 greater or equal than 2 and lower or equal than @code{N}. This may be written
 464 in the following mathematical form:
 465
 466 @tex
 467 $$D _{S3} =  \{(i,j) \in Z^2 \; | \; 2 \leq i \leq N \land 2 \leq j \leq N \}$$
 468 @end tex
 469
 470 @ifnottex
 471 @example
 472 @group
 473 D_S3 =  @{(i,j) in Z^2 | 2 <= i <= N && 2 <= j <= N @}
 474 @end group
 475 @end example
 476 @end ifnottex
 477
 478 @noindent It is easy to see that this iteration domain is a part of the
 479 2-dimensional space
 480 @tex
 481 $Z^2$.
 482 @end tex
 483 @ifnottex
 484 @example
 485 @group
 486 Z^2.
 487 @end group
 488 @end example
 489 @end ifnottex
 490 We often use in our research papers a graphical representation that gives a
 491 better view of this subspace:
 492
 493 @image{images/basic1,4cm}
 494
 495 @noindent Here the iteration domain is specified thanks to a set of
 496 constraints. When those constraints are affine and
 497 depend only on the outer loop counters and some parameters, the set of
 498 constraints defines a @emph{polyhedron} (more precisely this is a
 499 @emph{Z-polyhedron}, but we use @emph{polyhedron} for short).
 500 Hence the Polyhedral Model.
 501
 502 To facilitate the manipulation of the affine constraints, we use a matrix
 503 representation. To write it, we use the @emph{homogeneous} iteration vector:
 504 it is simply the iteration vector with some additional dimensions to
 505 represent the parameters and the constant.
 506 For instance for the statement @code{S3}, the
 507 iteration vector in homogeneous coordinates is @code{(i,j,N,1)}
 508 (we will now call it @emph{iteration vector} directly for short).
 509 Then we write all the constraints as affine inequalities of the form
 510 @tex
 511 $p(i) \geq 0$.
 512 @end tex
 513 @ifnottex
 514 @code{p(i) >= 0}.
 515 @end ifnottex
 516 For instance for the statement @code{S3} the set of constraints is:
 517
 518 @tex
 519 $$
 520 \hbox{$ \cases{  i         - 2  &$\geq 0$\cr
 521                 -i     + N      &$\geq 0$\cr
 522                      j     - 2  &$\geq 0$\cr
 523                     -j + N      &$\geq 0$}$}
 524 $$
 525 @end tex
 526
 527 @ifnottex
 528 @example
 529 @group
 530     i - 2 >= 0
 531    -i + N >= 0
 532     j - 2 >= 0
 533    -j + N >= 0
 534 @end group
 535 @end example
 536 @end ifnottex
 537
 538 @noindent Lastly, we translate the constraint system to the form
 539 @strong{domain matrix}@code{ * }@emph{iteration vector}@code{ >= 0}
 540 (please someone show me how to do this in TeX -not LaTeX- for the
 541 texinfo manual !):
 542 @example
 543 @group
 544 [  1  0  0 -2 ]   [ i ]    [ 0 ]
 545 [ -1  0  1  0 ]   [ j ]    [ 0 ]
 546 [  0  1  0 -2 ] * [ N ] >= [ 0 ]
 547 [  0 -1  1  0 ]   [ 1 ]    [ 0 ]
 548 @end group
 549 @end example
 550
 551 @noindent The domain matrix (along with the iteration vector which
 552 is most of the time an implicit information) will
 553 be used in all our tools to provide the informations on the iteration
 554 domain of a given statement.
 555
 556 @node Scattering Function
 557 @subsection Scattering Function
 558
 559 There is no ordering information inside the iteration domain: it only describes
 560 the set of statement instances but @strong{not} the order in which they have
 561 to be executed relatively to each other. In the past the lexicographic order
 562 of the iteration domain was considered, this is no more true
 563 (especially when using CLooG). If we don't give any ordering information, this
 564 means that the statement instances may be executed in any order
 565 (this is useful, e.g., to specify parallelism), but some statement instances
 566 may depend on some others and it may be critical to enforce a given order (or
 567 non-order). Hence we need another information.
 568
 569 We call @emph{scattering} any kind of ordering information in the Polyhedral
 570 Model. There exists many kind of ordering indeed, as @emph{allocation},
 571 @emph{scheduling}, @emph{chunking} etc. Nevertheless they are all expressed
 572 in the same way, using @emph{logical stamps} that can have various semantics.
 573
 574 In the case of @strong{scheduling}, the logical stamps are logical dates that
 575 express at which date a statement instance have to be executed. For instance,
 576 let us consider the following three statements:
 577
 578 @example
 579 @group
 580 x = a + b;   // S1
 581 y = c + d;   // S2
 582 z = a * b;   // S3
 583 @end group
 584 @end example
 585
 586 @noindent The scheduling of a statement @code{S} is typically
 587 denoted by
 588 @tex
 589 $\theta_{S}$.
 590 @end tex
 591 @ifnottex
 592 T_S.
 593 @end ifnottex
 594 Let us consider the following logical dates for each statement:
 595
 596 @tex
 597 @example
 598 @group
 599 $\theta_{S1} = 2$
 600 $\theta_{S2} = 3$
 601 $\theta_{S3} = 1$
 602 @end group
 603 @end example
 604 @end tex
 605
 606 @ifnottex
 607 @example
 608 @group
 609 T_S1 = 1
 610 T_S2 = 2
 611 T_S3 = 3
 612 @end group
 613 @end example
 614 @end ifnottex
 615
 616 @noindent It means that statement @code{S3} have to be executed at logical date
 617 @code{1}, statement @code{S1} have to be executed at logical date
 618 @code{2} and statement @code{S2} have to be executed at logical date
 619 @code{3}. The target code have to respect this scheduling (the order of
 620 the logical dates), hence it would look like the following where the
 621 variable @code{t} denotes the time:
 622
 623 @example
 624 @group
 625 t = 1;
 626 z = a * b;   // S3
 627 t = 2;
 628 x = a + b;   // S1
 629 t = 3;
 630 y = c + d;   // S2
 631 @end group
 632 @end example
 633
 634 @noindent When some statements share the same logical date, this means that,
 635 once the program reaches this logical date, the two statements can be executed
 636 in any order, or better, in parallel. For instance let us consider the
 637 following scheduling:
 638
 639 @tex
 640 @example
 641 @group
 642 $\theta_{S1} = 1$
 643 $\theta_{S2} = 2$
 644 $\theta_{S3} = 1$
 645 @end group
 646 @end example
 647 @end tex
 648
 649 @ifnottex
 650 @example
 651 @group
 652 T_S1 = 1
 653 T_S2 = 2
 654 T_S3 = 1
 655 @end group
 656 @end example
 657 @end ifnottex
 658
 659 @noindent Statements @code{S1} and @code{S3} have the same logical date,
 660 hence the target code would be:
 661
 662 @example
 663 @group
 664 t = 1;
 665 #pragma omp parallel sections
 666 @{
 667    #pragma omp section
 668    @{
 669      x = a + b;   // S1
 670    @}
 671    #pragma omp section
 672    @{
 673      z = a * b;   // S3
 674    @}
 675 @}
 676 t = 2;
 677 y = c + d;        // S2
 678 @end group
 679 @end example
 680
 681 Logical dates may be multidimensional, as clocks: the first dimension
 682 corresponds to days (most significant), next one is hours (less
 683 significant), the third to minutes and so on. For instance we can consider
 684 the following multidimensional schedules for our example:
 685
 686 @tex
 687 @example
 688 @group
 689 $\theta_{S1} = (1,1)$
 690 $\theta_{S2} = (2,1)$
 691 $\theta_{S3} = (1,2)$
 692 @end group
 693 @end example
 694 @end tex
 695
 696 @ifnottex
 697 @example
 698 @group
 699 T_S1 = (1,1)
 700 T_S2 = (2,1)
 701 T_S3 = (1,2)
 702 @end group
 703 @end example
 704 @end ifnottex
 705
 706 @noindent It is not very hard to decypher the meaning of such scheduling.
 707 Because of the first dimension, statements @code{S1} and @code{S3} will be
 708 executed before statement @code{S2} (@code{S1} and @code{S3} are executed at
 709 day 1, while @code{S2} is executed at day 2). The second dimension is not
 710 really useful there for @code{S2} because it is the only statement executed
 711 at day 2. Nevertheless it allows to order @code{S1} and
 712 @code{S3} relatively to each other since @code{S1} is executed at hour 1 of day
 713 1 while @code{S3} is executed at hour 2 of day 1.
 714 The corresponding target code is the following, with some
 715 additional time variables for a better view of the ordering (@code{t1}
 716 corresponds to the first time dimension, @code{t2} to the second one):
 717
 718 @example
 719 @group
 720 t1 = 1;
 721 t2 = 1;
 722 x = a + b;   // S1
 723 t2 = 2;
 724 z = a * b;   // S3
 725 t1 = 2;
 726 t2 = 1;
 727 y = c + d;   // S2
 728 @end group
 729 @end example
 730
 731 In the case of @strong{allocation} (in the litterature we can find some
 732 papers that call it @emph{placement}), the logical stamps are a processor
 733 number that expresses on which processor a statement instance has to be
 734 executed. Typically, allocations are written in the same way as scheduling
 735 (hence the general term of @emph{scattering}), here we denote it @math{P_S} for a
 736 statement @code{S}. For instance, let us consider the following
 737 allocation:
 738
 739 @tex
 740 @example
 741 @group
 742 $P_{S1} = 1$
 743 $P_{S2} = 2$
 744 $P_{S3} = 1$
 745 @end group
 746 @end example
 747 @end tex
 748
 749 @ifnottex
 750 @example
 751 @group
 752 P_S1 = 1
 753 P_S2 = 2
 754 P_S3 = 1
 755 @end group
 756 @end example
 757 @end ifnottex
 758
 759 @noindent The corresponding target code have to take into account that both
 760 statements @code{S1} and @code{S3} have to be executed on the same processor
 761 (they have the same logical number 1) and that statement @code{S2} have
 762 to be executed on another processor (logical number 2). A possible target code
 763 is the following:
 764
 765 @example
 766 @group
 767 #pragma omp parallel sections
 768 @{
 769    #pragma omp section
 770    @{
 771      // Logical processor 1
 772      x = a + b;   // S1
 773      z = a * b;   // S3
 774    @}
 775    #pragma omp section
 776    @{
 777      // Logical processor 2
 778      y = c + d;   // S2
 779    @}
 780 @}
 781 @end group
 782 @end example
 783
 784 @noindent We can note that no order have been specified for the
 785 statements @code{S1} and @code{S3} that are executed on the same processor.
 786 Hence any order is satisfying. For sake of flexibility, it is usual to build
 787 a scattering whose various dimensions do not have the same semantics. A
 788 typical construction is @emph{space/time mapping} where the first @code{n}
 789 dimensions are devoted to allocation, then the last @code{m}
 790 dimensions are devoted to scheduling. Typically, space/time mapping is
 791 written in the same way as scheduling (hence again the general term of
 792 @emph{scattering}), here we denote it for a statement @code{S} as @math{M_S}.
 793 For instance, let us consider the following space/time mapping for our
 794 example where one dimension is devoted for mapping and one dimension is
 795 devoted to scheduling:
 796
 797 @tex
 798 @example
 799 @group
 800 $M_{S1} = (1,2)$
 801 $M_{S2} = (2,1)$
 802 $M_{S3} = (1,1)$
 803 @end group
 804 @end example
 805 @end tex
 806
 807 @ifnottex
 808 @example
 809 @group
 810 M_S1 = (1,2)
 811 M_S2 = (2,1)
 812 M_S3 = (1,1)
 813 @end group
 814 @end example
 815 @end ifnottex
 816
 817 @noindent Here we have the same first dimension as the previous example, thus
 818 the allocation of the statements to processors is the same. The second
 819 dimension precises on a given processor at which logical date a statement
 820 instance has to be executed. Here, the statement @code{S1} is executed at
 821 day 2 on processor 1 while the statement @code{S3} is executed at day 1 onto
 822 the same processor. It follows this space/time mapping corresponds to the
 823 following target code (we added an additional variable to represent the
 824 local logical clocks):
 825
 826 @example
 827 @group
 828 #pragma omp parallel sections
 829 @{
 830    #pragma omp section
 831    @{
 832      // Logical processor 1
 833      t = 1;
 834      z = a * b;   // S3
 835      t = 2;
 836      x = a + b;   // S1
 837    @}
 838    #pragma omp section
 839    @{
 840      // Logical processor 2
 841      t = 1;
 842      y = c + d;   // S2
 843    @}
 844 @}
 845 @end group
 846 @end example
 847
 848 For the same reason as discussed for iteration domains
 849 (@pxref{Iteration Domain}), it is not possible to define a scattering for
 850 each statement instance, especially if the statement belongs to a
 851 (possibly parametric) loop. The iteration vector fully defines an
 852 instance of a given statement. Thus, a practical way to provide a scattering
 853 for each instance of a given statement is to use a @emph{function}
 854 that depends on the iteration vector. In this way the function may give
 855 for each iteration vector a different scattering. We call these functions
 856 @strong{scattering functions}. Scattering functions are @emph{affine}
 857 functions of the outer loop counter and the global parameters.
 858 For instance, let us consider the following source code:
 859
 860 @example
 861 @group
 862 for (i = 2; i <= 4; i++)
 863   for (j = 2; j <= 4; j++)
 864     P[i+j] += A[i] + B[j]; // S4
 865 @end group
 866 @end example
 867
 868 @noindent The iteration domain of the statement @code{S4} is:
 869
 870
 871 @tex
 872 $$D _{S4} =  \{(i,j) \in Z^2 \; | \; 2 \leq i \leq 4 \land 2 \leq j \leq 4 \}.$$
 873 @end tex
 874 @ifnottex
 875 @example
 876 @group
 877 D_S4=  @{(i,j) in Z^2 | 2 <= i <= 4 && 2 <= j <= 4 @}.
 878 @end group
 879 @end example
 880 @end ifnottex
 881
 882 @noindent If you are still not comfortable with the mathematical notation, it
 883 corresponds to the following graphical representation:
 884
 885 @image{images/basic2,3cm}
 886
 887 @noindent The list of the statement instances of @code{S4} (the integral
 888 points of its iteration domain) corresponds to the following iteration vectors:
 889
 890 @example
 891 @group
 892 iteration vector
 893      (2,2)
 894      (2,3)
 895      (2,4)
 896      (3,2)
 897      (3,3)
 898      (3,4)
 899      (4,2)
 900      (4,3)
 901      (4,4)
 902 @end group
 903 @end example
 904
 905 @noindent Let us suppose we want to schedule the instances of the statement
 906 @code{S4} (the integral points of its iteration domain) using the following
 907 scheduling function:
 908
 909 @tex
 910 @example
 911 @group
 912 $\theta_{S4}(i,j) = (j+2,3*i+j)$
 913 @end group
 914 @end example
 915 @end tex
 916
 917 @ifnottex
 918 @example
 919 @group
 920 T_S4(i,j) = (j+2,3*i+j)
 921 @end group
 922 @end example
 923 @end ifnottex
 924
 925 @noindent We only have to apply the function to each iteration vector to find
 926 the logical date of each instance:
 927
 928 @example
 929 @group
 930 iteration vector       logical date
 931      (2,2)       -->       (4,8)
 932      (2,3)       -->       (5,9)
 933      (2,4)       -->       (6,10)
 934      (3,2)       -->       (4,11)
 935      (3,3)       -->       (5,12)
 936      (3,4)       -->       (6,13)
 937      (4,2)       -->       (4,14)
 938      (4,3)       -->       (5,15)
 939      (4,4)       -->       (6,16)
 940 @end group
 941 @end example
 942
 943 Polyhedral Model users do not have to take care about the generation of a
 944 target code that respects the scattering: the CLooG tool is there to
 945 solve the problem quite easily (@code{http://www.cloog.org}). For the previous
 946 example, the target code would be the following (@code{t1} and @code{t2}
 947 corresponds to the two dimensions of the logical date):
 948
 949 @example
 950 @group
 951 for (t1 = 4; t1 <= 6; t1++) @{
 952   for (t2 = t1+4; t2 <= t1+10; t2++) @{
 953     if ((-t1+t2+2)%3 == 0) @{
 954       i = (-t1+t2+2)/3 ;
 955       j = t1-2 ;
 956       P[i+j] += A[i] + B[j]; // S4
 957     @}
 958   @}
 959 @}
 960 @end group
 961 @end example
 962
 963
 964
 965 Obviously with such a twisted scheduling, it is hard to see the "meaning"
 966 of the transformation. To name any kind of program transformation as
 967 a magic spell ("tile", "fuse", "skew"...) is an old bad habit that should
 968 be changed in the Polyhedral Model: a scheduling may be an arbitrary complex
 969 sequence of basic-old-good transformations. Nevertheless it is most of the
 970 time quite easy to translate well known transformations to schedules. For
 971 instance, let us consider this new scheduling function:
 972
 973 @tex
 974 @example
 975 @group
 976 $\theta_{S4}(i,j) = (j,i)$
 977 @end group
 978 @end example
 979 @end tex
 980
 981 @ifnottex
 982 @example
 983 @group
 984 T_S4(i,j) = (j,i)
 985 @end group
 986 @end example
 987 @end ifnottex
 988
 989 @noindent Using CLooG, we can generate the target code:
 990
 991 @example
 992 @group
 993 for (t1 = 2; t1 <= 4; t1++) @{
 994   for (t2 = 2; t2 <= 4; t2++) @{
 995     i = t2;
 996     j = t1;
 997     P[i+j] += A[i] + B[j]; // S4
 998   @}
 999 @}
1000 @end group
1001 @end example
1002
1003
1004 @noindent It is easy to see (and analyze) that it corresponds to a classical
1005 @emph{loop interchange} transformation.
1006
1007 A very useful example of multi-dimensional scattering functions is
1008 the @strong{scheduling of the original program}.
1009 The method to compute it is quite simple (@pxref{Fea92}). The idea is to
1010 build an abstract syntax tree of the program and to read the scheduling for
1011 each statement. For instance, let us consider the following implementation of
1012 a Cholesky factorization:
1013
1014 @example
1015 @group
1016 /* A Cholesky factorization kernel. */
1017 for (i=1;i<=N;i++) @{
1018   for (j=1;j<=i-1;j++) @{
1019     a[i][i] -= a[i][j] ;           /* S1 */
1020   @}
1021   a[i][i] = sqrt(a[i][i]) ;        /* S2 */
1022   for (j=i+1;j<=N;j++) @{
1023     for (k=1;k<=i-1;k++) @{
1024       a[j][i] -= a[j][k]*a[i][k] ; /* S3 */
1025     @}
1026     a[j][i] /= a[i][i] ;           /* S4 */
1027     @}
1028   @}
1029 @}
1030 @end group
1031 @end example
1032
1033 @noindent The corresponding abstract syntax tree is given in the following
1034 figure. It directly gives the scattering functions (schedules) for all the
1035 statements of the program.
1036
1037 @image{images/tree,6cm}
1038
1039 @tex
1040 $$
1041 \hbox{$ \cases{ \theta _{S1}(i,j)    &$=  (0,i,0,j,0)$\cr
1042                 \theta _{S2}(i)      &$=  (0,i,1)$\cr
1043                 \theta _{S3}(i,j,k)  &$=  (0,i,2,j,0,k,0)$\cr
1044                 \theta _{S4}(i,j)    &$=  (0,i,2,j,1)$}$}
1045 $$
1046 @end tex
1047
1048 @ifnottex
1049 @example
1050 @group
1051 T_S1(i,j)   = (0,i,0,j,0)
1052 T_S2(i)     = (0,i,1)
1053 T_S3(i,j,k) = (0,i,2,j,0,k,0)
1054 T_S4(i,j)   = (0,i,2,j,1)
1055 @end group
1056 @end example
1057 @end ifnottex
1058
1059 @noindent These schedules depend on the iterators and give for each instance
1060 of each statement a unique execution date. Using such scattering functions
1061 allows CLooG to re-generate the input code.
1062
1063 @noindent To easily manipulate the scattering function of any
1064 statement @code{S}, we translate it to the matrix form:
1065 @tex
1066 @math{\theta_S}(@emph{iteration vector})
1067 @end tex
1068 @ifnottex
1069 T_S(@emph{iteration vector})
1070 @end ifnottex
1071 @code{ = }@strong{scattering matrix}@code{ * }@emph{iteration vector}.
1072 For instance let us consider again our previous example
1073 @tex
1074 $\theta_{S4}(i,j) = (j+2,3*i+j).$
1075 @end tex
1076 @ifnottex
1077 T_S4(i,j) = (j+2,3*i+j).
1078 @end ifnottex
1079 We write it in the following way (again please someone show me how to do
1080 this in TeX -not LaTeX- for the texinfo manual !):
1081 @example
1082 @group
1083      [ i ]    [  0  1  2 ]   [ i ]
1084 T_S4([ j ]) = [  3  1  0 ] * [ j ]
1085      [ 1 ]                   [ 1 ]
1086 @end group
1087 @end example
1088
1089 @noindent The scattering matrix (along with the iteration vector which
1090 is most of the time an implicit information) will
1091 be used in all our tools to provide the informations on the scattering
1092 of a given statement.
1093
1094
1095 @node Access Function
1096 @subsection Access Function
1097
1098 Before applying any transformation, it is essential to deeply analyze both the
1099 original program and the transformation to ensure the transformation does not
1100 imply any modification of the original program semantics. In the Polyhedral
1101 Model, we can reach a @strong{total analysis power}: we are able to achieve
1102 an exact analysis when all the memory accesses are made through arrays
1103 (note that variables are a particular case of arrays since they are simply
1104 arrays with only one memory location) with affine subscripts that depend
1105 on outer loop counters and global parameters (note that @emph{subscripts}
1106 are sometimes called @emph{index} or @emph{accesses} in the litterature).
1107
1108 For instance let us consider the array access @code{A[2*i+j][j][i+N]}. It has
1109 three dimensions, each subscript dimension is an affine form of some outer loop
1110 iterarors (@code{i} and @code{j}) and global parameters (@code{N}) hence
1111 it corresponds to an acceptable array access in the Polyhedral Model.
1112
1113 Each array access can target a different memory cell depending on the
1114 statement instance, i.e., depending on the iteration vector.
1115 Thus we use access functions (or subscript functions or index functions as you
1116 prefer) depending on the iteration vector to describe an array access. In our
1117 example, the access function would be written
1118 @math{F_A(i,j) = (2*i+j,j,i+N)}.
1119
1120 @noindent To easily manipulate the access function of any
1121 array @code{A}, we translate it to the matrix form:
1122 @math{F_A}(@emph{iteration vector})
1123 @code{ = }@strong{access matrix}@code{ * }@emph{iteration vector}.
1124 For instance let us consider again our previous example: we write it in the
1125 following way (again please someone show me how to do this in TeX -not LaTeX-
1126 for the texinfo manual !):
1127 @example
1128 @group
1129     [ i ]    [  2  1  0  0 ]   [ i ]
1130 F_A([ j ]) = [  0  1  0  0 ] * [ j ]
1131     [ N ]    [  1  0  1  0 ]   [ N ]
1132     [ 1 ]                      [ 1 ]
1133 @end group
1134 @end example
1135
1136 @noindent The access matrix (along with the iteration vector which
1137 is most of the time an implicit information) will
1138 be used in all our tools to provide the informations on the access
1139 of a given statement.
1140
1141
1142
1143 @c %  *********************** OpenScop Specification **************************
1144 @node OpenScop File Format Specification
1145 @chapter OpenScop File Format Specification
1146
1147
1148 @menu
1149 * A First Example::
1150 * Writing The Input File::
1151 * Reading The Output File::
1152 * Calling Clan::
1153 * Clan Options::
1154 @end menu
1155
1156 @c %/*************************************************************************
1157 @c % *                              A FIRST EXAMPLE                          *
1158 @c % *************************************************************************/
1159 @node A First Example
1160 @section A First Example
1161 Clan takes as input a source code file than can be written in either C or
1162 C++ or C# or Java (or any other imperative language that is close enough
1163 to C). It is very simple as it only translates a part of a program that can
1164 be represented using the Polyhedral model (@pxref{Polyhedral Representation}) in
1165 a matrix form. Clan does not find itself the program parts that could be
1166 represented using the Polyhedral Model. More complex tools like WRAP-IT for the
1167 ORC compiler (@code{http://www.lri.fr/~girbal/site_wrapit})or the GRAPHITE
1168 branch of GCC (@code{http://gcc.gnu.org/wiki/Graphite}) are devoted to
1169 such a complex, highly technical problem. Using Clan, the user has to specify
1170 thanks to pragmas where begins the SCoP he is interested by, and where it ends.
1171
1172 For instance, let us consider the following source code in C of a matrix-matrix
1173 multiply program that reads two matrices, achieves the multiply then prints
1174 the result. Let us also consider that the user is only interested in the
1175 matrix-matrix multiply kernel:
1176
1177 @example
1178 /* matmul.c 128*128 matrix multiply */
1179 #include <stdio.h>
1180 #define N 128
1181
1182 int main() @{
1183   int   i,j,k;
1184   float a[N][N], b[N][N], c[N][N];
1185
1186   /* We read matrix a then matrix b */
1187   for (i = 0; i < N; i++)
1188     for (j = 0; j < N; j++)
1189       scanf(" %f",&a[i][j]);
1190   for (i = 0; i < N; i++)
1191     for (j = 0; j < N; j++)
1192       scanf(" %f",&b[i][j]);
1193
1194   /* c = a * b */
1195 #pragma scop
1196   for (i = 0; i < N; i++)
1197     for (j = 0; j < N; j++) @{
1198       c[i][j] = 0.0;
1199       for (k = 0; k < N; k++)
1200         c[i][j] = c[i][j] + a[i][k]*b[k][j];
1201     @}
1202 #pragma endscop
1203
1204   /* We print matrix c */
1205   for (i = 0; i < N; i++) @{
1206     for (j = 0; j < N; j++)
1207       printf("%6.2f ",c[i][j]);
1208     printf("\n");
1209   @}
1210
1211   return 0;
1212 @}
1213 @end example
1214
1215 The tags to ask Clan to consider a given part of the code are provided thanks
1216 to the pragmas @code{#pragma scop} and  @code{#pragma endscop}. It can have
1217 different forms depending on the input language. This is explained in
1218 a further section (@pxref{Writing The Input File}).
1219
1220 This source code file may be called @samp{matmul.c}
1221 (this example is provided in the Clan distribution as
1222 @code{test/matmul.c}) and we can ask Clan to process it
1223 and to generate the polyhedral representation by a simple call to Clan
1224 with this file as input: @samp{clan matmul.c}. By default, Clan will print
1225 the polyhedral representation in the standard output:
1226
1227 @example
1228 # [File generated by Clan 1.0.0 64 bits]
1229
1230 SCoP
1231
1232 # =============================================== Global
1233 # Language
1234 C
1235
1236 # Context
1237 0 3
1238
1239 # Parameter names are provided
1240 1
1241 # Parameter names
1242 N
1243
1244 # Number of statements
1245 2
1246
1247 # =============================================== Statement 1
1248 # ----------------------------------------------  1.1 Domain
1249 # Iteration domain
1250 1
1251 4 5
1252    1    1    0    0    0    ## i >= 0
1253    1   -1    0    1   -1    ## -i+N-1 >= 0
1254    1    0    1    0    0    ## j >= 0
1255    1    0   -1    1   -1    ## -j+N-1 >= 0
1256
1257 # ----------------------------------------------  1.2 Scattering
1258 # Scattering function is provided
1259 1
1260 # Scattering function
1261 5 5
1262    0    0    0    0    0    ## 0
1263    0    1    0    0    0    ## i
1264    0    0    0    0    0    ## 0
1265    0    0    1    0    0    ## j
1266    0    0    0    0    0    ## 0
1267
1268 # ----------------------------------------------  1.3 Access
1269 # Access informations are provided
1270 1
1271 # Read access informations
1272 0 5
1273 # Write access informations
1274 2 5
1275    1    1    0    0    0    ## c[i][j]
1276    0    0    1    0    0    ##
1277
1278 # ----------------------------------------------  1.4 Body
1279 # Statement body is provided
1280 1
1281 # Original iterator names
1282 i j
1283 # Statement body
1284 c[i][j]=0.0;
1285
1286
1287 # =============================================== Statement 2
1288 # ----------------------------------------------  2.1 Domain
1289 # Iteration domain
1290 1
1291 6 6
1292    1    1    0    0    0    0    ## i >= 0
1293    1   -1    0    0    1   -1    ## -i+N-1 >= 0
1294    1    0    1    0    0    0    ## j >= 0
1295    1    0   -1    0    1   -1    ## -j+N-1 >= 0
1296    1    0    0    1    0    0    ## k >= 0
1297    1    0    0   -1    1   -1    ## -k+N-1 >= 0
1298
1299 # ----------------------------------------------  2.2 Scattering
1300 # Scattering function is provided
1301 1
1302 # Scattering function
1303 7 6
1304    0    0    0    0    0    0    ## 0
1305    0    1    0    0    0    0    ## i
1306    0    0    0    0    0    0    ## 0
1307    0    0    1    0    0    0    ## j
1308    0    0    0    0    0    1    ## 1
1309    0    0    0    1    0    0    ## k
1310    0    0    0    0    0    0    ## 0
1311
1312 # ----------------------------------------------  2.3 Access
1313 # Access informations are provided
1314 1
1315 # Read access informations
1316 6 6
1317    1    1    0    0    0    0    ## c[i][j]
1318    0    0    1    0    0    0    ##
1319    2    1    0    0    0    0    ## a[i][k]
1320    0    0    0    1    0    0    ##
1321    3    0    0    1    0    0    ## b[k][j]
1322    0    0    1    0    0    0    ##
1323 # Write access informations
1324 2 6
1325    1    1    0    0    0    0    ## c[i][j]
1326    0    0    1    0    0    0    ##
1327
1328 # ----------------------------------------------  2.4 Body
1329 # Statement body is provided
1330 1
1331 # Original iterator names
1332 i j k
1333 # Statement body
1334 c[i][j]=c[i][j]+a[i][k]*b[k][j];
1335
1336
1337 # =============================================== Options
1338 @end example
1339
1340 We will not describe here precisely the structure and the components of this
1341 output, this is described in depth in a further section
1342 (@pxref{Reading The Output File}). This file format, called @code{.scop} has
1343 been designed to be the input file format of most of the polyhedral tools.
1344 If you read the description of the polyhedral representation of programs,
1345 you should already feel familiar with this file format
1346 (@pxref{Polyhedral Representation}).
1347
1348
1349 @c %/*************************************************************************
1350 @c % *                                Input file                             *
1351 @c % *************************************************************************/
1352 @node Writing The Input File
1353 @section Writing The Input File
1354
1355 The input file of Clan is a source code file written in any language based on
1356 C for the @code{for} loop, the @code{if} and for the array accesses.
1357 C, C++, Java and C# are good examples that should work pretty well with Clan.
1358
1359 The input file may contain a static control part (i.e., a part of the
1360 program that can be represented using the Polyhedral Model as described in
1361 the corresponding chapter, @pxref{Polyhedral Representation}) delimitated
1362 @strong{by the user} thanks to pragmas. Clan trusts the user: it will not check
1363 hardly whether the program part is actually a SCoP or not. It will only try to
1364 translate the program part to a polyhedral representation and will fail with
1365 @emph{syntax error} if it reads something wrong.
1366
1367 In C, C++ and C#, the pragma to tag the beginning of the SCoP is:
1368 @example
1369 @group
1370 #pragma scop
1371 @end group
1372 @end example
1373 @noindent and the pragma to tag the end of the SCoP is:
1374 @example
1375 @group
1376 #pragma endscop
1377 @end group
1378 @end example
1379
1380 In Java, the pragma to tag the beginning of the SCoP is:
1381 @example
1382 @group
1383 /*@@ scop */
1384 @end group
1385 @end example
1386 @noindent and the pragma to tag the end of the SCoP is:
1387 @example
1388 @group
1389 /*@@ end scop */
1390 @end group
1391 @end example
1392
1393
1394
1395 @c %/*************************************************************************
1396 @c % *                               Output file                             *
1397 @c % *************************************************************************/
1398 @node Reading The Output File
1399 @section Reading The Output File
1400
1401 The output text file of Clan provides an explicit polyhedral representation of
1402 a static control part. The output file format is called @emph{.scop} format.
1403 It has been designed by various researchers in polyhedral compilation from
1404 various institutions. It builds on previous popular polyhedral file formats
1405 like @emph{.cloog} to provide a unique, extensible file format to every
1406 polyhedral compilation tools (including future versions of CLooG). This file
1407 is composed of two main parts. The first part is devoted to the polyhedral
1408 representation of a SCoP. It contains what is strictly necessary to enter a
1409 complete source-to-source framework in the polyhedral model and to output a
1410 semantically equivalent code for the SCoP, from analysis to code generation.
1411 The second part of the file contains options, i.e. extensions to provide
1412 additional informations to some tools.
1413
1414 The following grammar describes the structure of the first part of the
1415 .scop file format where terminals are preceeded by "_". Each
1416 relevant part will be explained in more details momentarily. Its looks
1417 long but it has been artificially extended to be easily understood and
1418 it can be easily simplified:
1419
1420 @example
1421 File                ::= SCoP
1422 SCoP                ::= "SCoP"   Context Statements
1423 Context             ::= Language Domain  Parameters
1424 Statements          ::= Nb_statements Statement_list
1425 String_list         ::= _String   String_list    | (void)
1426 Statement_list      ::= Statement Statement_list | (void)
1427 Domain_list         ::= Domain    Domain_list    | (void)
1428 Statement           ::= Iteration_domain Scattering Access Body
1429 Iteration_domain    ::= Domain_union
1430 Domain_union        ::= Nb_domains Domain_list
1431 Scattering          ::= "0" | "1" Scattering_function
1432 Access              ::= "0" | "1" Read_function Write_function
1433 Parameters          ::= "0" | "1" Parameter_list
1434 Body                ::= "0" | "1" Iterator_list Body_text
1435 Language            ::= "C" | "C++" | "C#" | "Java" | "Toy"
1436 Parameter_list      ::= String_list
1437 Iterator_list       ::= String_list
1438 Domain              ::= _Matrix
1439 Scattering_function ::= _Matrix
1440 Read_function       ::= _Matrix
1441 Write_function      ::= _Matrix
1442 Nb_statements       ::= _Integer
1443 Nb_domains          ::= _Integer
1444 Body_text           ::= _String
1445 @end example
1446
1447
1448 @itemize @bullet
1449 @item  @samp{Context} represents the informations that are
1450        shared by all the statements. It consists on
1451        the language used (which can be @samp{C}, @samp{C++}, @samp{C#}
1452        or @samp{Java}), the global constraints on parameters and
1453        optionally the parameter names. The set of constraints on parameters is
1454        essential since it provides the number of parameters. The @samp{Domain}
1455        encoding includes the number of unknown (here the number of parameters)
1456        (@pxref{Domain Representation}). Even if there are no constraints, this
1457        number has to be correct.
1458        After the constraints, it is possible to provide the list of parameter
1459        names (the textual names in the original program). A @samp{0} means
1460        we don't provide the list of parameter names, and a @samp{1} means
1461        the list of parameter names is provided afterward. The original
1462        parameter names
1463        are necessary for the code generator to be able to generate a code
1464        that can replace directly the SCoP in the original program by
1465        copy/paste. In the case of a @samp{0}, parameter names will probably
1466        be generated by the code generator (this is the case when using CLooG)
1467        or will be extracted from another input source.
1468 @item  @samp{Statements} represents the informations on the statements.
1469        @samp{Nb_statements} is the number of statements in the program,
1470        i.e. the number of @samp{Statement} items in the @samp{Statement_list}.
1471        @samp{Statement} represents the informations on a given statement.
1472        To each statement is associated four informations, the first one is
1473        mandatory while the three others are optional. The statement iteration
1474        domain @samp{Iteration_domain} is the required information, then one can
1475        provide optionally a scattering function, the access functions and
1476        the statement body, in this order. Each optional information is
1477        preceeded by a boolean that precises whether the optional information is
1478        provided or not.
1479        The iteration domain (@pxref{Iteration Domain}) is represented using a
1480        matrix (@pxref{Domain Representation}). Next, the scattering function
1481        (@pxref{Scattering Function}) is represented using a matrix as well
1482        (@pxref{Scattering Representation}). The access functions
1483        (@pxref{Access Function}) are represented using two matrices, one for read
1484        accesses and another one for write accesses
1485        (@pxref{Access Representation}). The statement body is made of two
1486        parts: first, the list of surrounding loop counters in the original
1487        program and second, the text string of the statement. This
1488        representation allows to apply the substitution of the original
1489        iterators with new iterators in the target program.
1490 @end itemize
1491
1492 The main terminal parts (domains, scattering and access functions) are
1493 detailed in the next subsections. Lastly, we will describe the option part
1494 (@pxref{Option Part}).
1495
1496 @menu
1497 * Domain Representation::
1498 * Scattering Representation::
1499 * Access Representation::
1500 * Option Part::
1501 @end menu
1502
1503 @node Domain Representation
1504 @subsection Domain Representation
1505 As shown by the grammar, the input file describes the various informations
1506 thanks to strings, integers and domains. Each domain is defined by a set of
1507 constraints in the PolyLib format (@pxref{Wil93}). They have the
1508 following syntax:
1509 @enumerate
1510 @item Some optional comment lines beginning with @samp{#}.
1511 @item The row and column numbers, possibly followed by comments.
1512 @item The constraint rows. Each row corresponds to a constraint the
1513       domain have to satisfy. Each row must be on a single line and is possibly
1514       followed by comments. The constraint is an equality @math{p(x) = 0} if the
1515       first element is 0, an inequality  @math{p(x) \geq 0} if the first element
1516       is 1. The next elements are the unknown coefficients, followed by
1517       the parameter coefficients. The last element is the constant factor.
1518 @end enumerate
1519 For instance, assuming that @samp{i}, @samp{j} and @samp{k} are the loop
1520 iterators and @samp{m} and @samp{n} are the parameters, the domain defined by
1521 the following constraints :
1522
1523 @tex
1524 $$
1525 \hbox{$ \cases{ -i     + m &$\geq 0$\cr
1526                     -j + n &$\geq 0$\cr
1527                  i + j - k &$\geq 0$}$}
1528 $$
1529 @end tex
1530 @ifnottex
1531 @example
1532 @group
1533    -i + m >= 0
1534    -j + n >= 0
1535 i + j - k >= 0
1536 @end group
1537 @end example
1538 @end ifnottex
1539
1540 @noindent can be written in the input file as follows :
1541
1542 @example
1543 @group
1544 # This is a domain
1545 3 7                      # 3 lines and 7 columns
1546 # eq/in i  j  k  m  n  1
1547     1  -1  0  0  1  0  0 #    -i + m >= 0
1548     1   0 -1  0  0  1  0 #    -j + n >= 0
1549     1   1  1 -1  0  0  0 # i + j - k >= 0
1550 @end group
1551 @end example
1552
1553 Each iteration domain @samp{Iteration_domain} of a given statement
1554 is a @emph{union} of polyhedra
1555 @samp{Domain_union}. A union is defined by its number of elements
1556 @samp{Nb_domains} and the elements themselves @samp{Domain_list}.
1557 For instance, let us consider the following pseudo-code:
1558
1559 @example
1560 @group
1561 for (i = 1; i <= n; i++) @{
1562   if ((i >= m) || (i <= 2*m))
1563     S1;
1564 @}
1565 @end group
1566 @end example
1567
1568 @noindent The iteration domain of @samp{S1} can be divided into two
1569 polyhedra and written in the .scop file as follows:
1570
1571 @example
1572 @group
1573 2 # Number of polyhedra in the union
1574 # First domain
1575 3 5                # 3 lines and 5 columns
1576 # eq/in i  m  n  1
1577     1   1  0  0 -1 #  i >= 1
1578     1  -1  0  1  0 #  i <= n
1579     1   1 -1  0  0 #  i >= m
1580 # Second domain
1581 3 5                # 3 lines and 5 columns
1582 # eq/in i  m  n  1
1583     1   1  0  0 -1 #  i >= 1
1584     1  -1  0  1  0 #  i <= n
1585     1  -1  2  0  0 #  i <= 2*m
1586 @end group
1587 @end example
1588
1589 @node Scattering Representation
1590 @subsection Scattering Representation
1591 Scattering functions are depicted in the input file thanks a representation
1592 very close to the domain one. The difference is each row do not describe a
1593 constraint but a scattering function dimension (@pxref{Scattering Function}).
1594 By convention, the first element of each row (the one that defines whether
1595 the constraint is an equality or an inequality for domains)
1596 @emph{must be set to 0}. The next elements are the unknown coefficients,
1597 followed by the parameter coefficients. The last element is the constant
1598 factor. For instance, assuming that @samp{i}, @samp{j} and @samp{k} are the loop
1599 iterators and @samp{m} and @samp{n} are the parameters, the scattering
1600 function
1601 @tex
1602 $\theta_{S}(i,j,k) = (j+2,3*i+j,k+n+1)$
1603 @end tex
1604 @ifnottex
1605 @example
1606 T_@{S@}(i,j,k) = (j+2,3*i+j,k+n+1)
1607 @end example
1608 @end ifnottex
1609 may be written in a .scop file in the following way:
1610
1611 @example
1612 @group
1613 # A scattering function
1614 3 7                      # 3 dimensions and 7 columns
1615 # 0  i  j  k  m  n  1
1616   0  0  1  0  0  0  2    # j+2
1617   0  3  1  0  0  0  0    # 3*i+j
1618   0  0  0  1  0  1  1    # k+n+1
1619 @end group
1620 @end example
1621
1622 Note that this representation is different from the .cloog format: the useless
1623 and error-prone identity matrix part disappeared.
1624
1625 The scattering function extracted by Clan is the scheduling of the original
1626 program as described in a previous section (@pxref{Scattering Function}). It
1627 allows a code generator (like CLooG) to reconstruct directly the original
1628 program or a dependence analyzer (like Candl) to achieve its data dependence
1629 calculation.
1630
1631 @node Access Representation
1632 @subsection Access Representation
1633
1634 Access functions are depicted in the input file thanks a representation
1635 very close to the domain one. The difference is each row do not describe a
1636 constraint but a access function dimension (@pxref{Access Function}).
1637 Moreover, the matrix representation do not describes only one access function
1638 but a set of access functions. Each array accessed in the SCoP has a unique
1639 strictly positive identification number. The first element of each row (the
1640 one that defines whether the constraint is an equality or an inequality for
1641 domains) corresponds to the array identifier iff the row corresponds to the
1642 first dimension of the access function. If the first element is 0, this means
1643 the row corresponds to the next dimension of the access function, with respect
1644 to the previous row. The next elements are the unknown coefficients,
1645 followed by the parameter coefficients. The last element is the constant
1646 factor. For instance, assuming that @samp{i}, @samp{j} and @samp{k} are the loop
1647 iterators and @samp{m} and @samp{n} are the parameters, the set of array
1648 accesses @code{A[2*i+j][j][i+n]}, @code{B[i+j]} and @code{A[k][j][1]} (the
1649 identifier of @code{A} is 1 and the identifier of @code{B} is 2)
1650 may be written in a .scop file in the following way:
1651
1652 @example
1653 @group
1654 # A set of access functions
1655 7 7                      # 7 rows and 7 columns
1656 # id  i  j  k  m  n  1
1657    1  2  1  0  0  0  0   # A[2*i+j][j][i+n]
1658    0  0  1  0  0  0  0   #
1659    0  1  0  0  0  1  0   #
1660    2  1  1  0  0  0  0   # B[i+j]
1661    1  0  0  1  0  0  0   # A[k][j][1]
1662    0  0  1  0  0  0  0   #
1663    0  0  0  0  0  0  1   #
1664 @end group
1665 @end example
1666
1667
1668 @node Option Part
1669 @subsection Option Part
1670
1671 The end of the .scop file is made of a succession of options delimited using
1672 XML-like tags. Each tool will take care of known options and will ignore the
1673 others. There is no specification for the option body as it is
1674 tool-dependent. Nevertheless, authors are invited to put the name of
1675 the tool inside the option name to avoid conflicts. A reserved
1676 option name is @samp{Comments} that allows to put some comments in the second
1677 part of the .scop file. For instance, this could be a possible
1678 second part of a .scop file:
1679
1680 @example
1681 @group
1682 <Comments>
1683     Just a comment example.
1684 <\Comments>
1685
1686 <CLooG foobar>
1687     This is supposed to provide CLooG some interesting
1688     additional informations.
1689 <\CLooG foobar>
1690 @end group
1691 @end example
1692
1693 A second reserved name is @samp{arrays}, Clan can optionally print a
1694 table of the arrays referenced in the access functions. For instance, for a program referencing first the array @samp{FOO}, then the array @samp{BAR}:
1695
1696 @example
1697 @group
1698 <arrays>
1699 # Number of referenced arrays
1700 2
1701 # First reference
1702 1 FOO
1703 # Second reference
1704 2 BAR
1705 </arrays>
1706 @end group
1707 @end example
1708
1709
1710
1711 @c %/*************************************************************************
1712 @c % *                              Calling Clan                             *
1713 @c % *************************************************************************/
1714 @node Calling Clan
1715 @section Calling Clan
1716 Clan is called by the following command:
1717 @example
1718        clan [ options | file ]
1719 @end example
1720 The default behavior of Clan is to read the input source code from a file and
1721 to print the generated .scop file on the standard output.
1722 Clan's behavior and the output file are under the user control thanks
1723 to some options which will be detailed momentarily (@pxref{Clan Options}).
1724 @code{file} is the input file. @code{stdin} is a special value: when used,
1725 input is standard input. For instance, we can call Clan to process the
1726 input file @code{basic.c} with default options by typing:
1727 @code{clan basic.c} or @code{more basic.c | clan stdin}
1728 (usual @code{more basic.c | clan -} works too).
1729
1730
1731 @c %/*************************************************************************
1732 @c % *                              Clan Options                             *
1733 @c % *************************************************************************/
1734 @node Clan Options
1735 @section Clan Options
1736
1737 @menu
1738 * Output::
1739 * Arrays Tag::
1740 * Help::
1741 * Version ::
1742 @end menu
1743
1744 @node Output
1745 @subsection Output @code{-o <output>}
1746
1747      @code{-o <output>}: this option sets the output file. @code{stdout} is a
1748      special value: when used, output is standard output.
1749      Default value is @code{stdout}.
1750
1751 @node Arrays Tag
1752 @subsection Arrays Tag @code{-arraystag}
1753
1754 @code{-arraystag}: this option dumps the table of referenced arrays at
1755 the end of the .scop file, between the @samp{<arrays>} and
1756 @samp{</arrays} tags.
1757
1758 @node Help
1759 @subsection Help @code{--help} or @code{-h}
1760
1761      @code{--help} or @code{-h}: this option asks Clan to print a short help.
1762
1763 @node Version
1764 @subsection Version @code{--version} or @code{-v}
1765
1766      @code{--version} or @code{-v}: this option asks Clan to print some
1767      release and version informations.
1768
1769
1770 @c %/*************************************************************************
1771 @c % *                 OpenScop Data Structure Specification                 *
1772 @c % *************************************************************************/
1773 @node OpenScop Data Structure Specification
1774 @chapter OpenScop Data Structure Specification
1775 The Clan Library was implemented to allow the user to call Clan
1776 directly from his programs, without file accesses or system calls. The
1777 user only needs to link his programs with C libraries. The Clan
1778 library mainly provides one function (@code{clan_scop_extract})
1779 which takes as input the source code file to process with some options,
1780 and returns the data structure corresponding
1781 to the SCoP (a @code{clan_scop_t} structure) which contains the
1782 polyhedral representation of the SCoP.
1783 The user can work with this data structure and/or use
1784 the printing function to output a .scop file.
1785 Some other functions are provided for convenience reasons.
1786 These functions as well as the data structures are described in this section.
1787
1788 @menu
1789 * Clan Data Structures::
1790 * Clan Functions::
1791 * Example of Library Utilization::
1792 @end menu
1793
1794
1795 @node Clan Data Structures
1796 @section Clan Data Structures Description
1797 In this section, we describe the data structures used by the loop
1798 analyzer to represent and to process a SCoP. As this is not a developer's
1799 guide, data structure devoted to internal use as well as internal use
1800 fields are not described here.
1801
1802 @menu
1803 * clan_matrix_t::
1804 * clan_matrix_list_t::
1805 * clan_statement_t::
1806 * clan_scop_t::
1807 * clan_options_t::
1808 @end menu
1809
1810
1811 @node clan_matrix_t
1812 @subsection clan_matrix_t
1813
1814 @noindent The @code{clan_matrix_t} structure is the same as the PolyLib
1815 @code{Matrix} data structure (@pxref{Wil93}) in order to be used
1816 directly by tools using this format with a simple cast.
1817
1818 @example
1819 @group
1820 struct clan_matrix
1821 @{
1822   unsigned NbRows;     /* The number of rows */
1823   unsigned NbColumns;  /* The number of columns */
1824   clan_int_t ** p;     /* An array of pointers to the matrix row */
1825   clan_int_t * p_Init; /* The matrix rows contiguously in memory */
1826   int p_Init_size;     /* Initial size of p_Init */
1827 @};
1828 typedef struct clan_matrix   clan_matrix_t;
1829 typedef struct clan_matrix * clan_matrix_p;
1830 @end group
1831 @end example
1832
1833 @noindent The whole matrix is stored in memory row after row at the
1834 @code{p_Init} address. @code{p} is an array of pointers where
1835 @code{p[i]} points to the first element of the @math{i^{th}} row.
1836 @code{NbRows} and @code{NbColumns} are respectively the number of
1837 rows and columns of the matrix.
1838
1839 @noindent To be able to provide different precision versions (Clan
1840 supports 32 bits, 64 bits and arbitrary precision through the GMP library),
1841 the @code{clan_int_t} type depends on the configuration options (it may be
1842 @code{long int} for 32 bits version, @code{long long int} for 64 bits version,
1843 and @code{mpz_t} for multiple precision version).
1844 The @code{p_Init_size} field is needed to free
1845 the memory allocated by @code{mpz_init} in the multiple precision version.
1846 Set this field to the initial size of the @code{p_Init} array
1847 (its size may vary during processing, for instance if you are using PolyLib).
1848
1849
1850 @node clan_matrix_list_t
1851 @subsection clan_matrix_list_t
1852
1853 @example
1854 @group
1855 struct clan_matrix_list
1856 @{
1857   clan_matrix_p elt;             /* An element of the list. */
1858   struct clan_matrix_list* next; /* Pointer to the next element. */
1859 @};
1860 typedef struct clan_matrix_list   clan_matrix_list_t;
1861 typedef struct clan_matrix_list * clan_matrix_list_p;
1862 @end group
1863 @end example
1864
1865 @noindent The @code{clan_matrix_list_t} structure is a @code{NULL}-terminated
1866 linked list of @code{clan_matrix_t} data structures. It is used for instance
1867 to represent a union of convex domains (each matrix representing a convex
1868 part of the union). @code{elt} is a matrix element of the list and
1869 @code{next} is the pointer to the next element of the list.
1870
1871
1872 @node clan_statement_t
1873 @subsection clan_statement_t
1874 @example
1875 @group
1876 struct clan_statement
1877 @{
1878   clan_matrix_list_p domain;    /* Iteration domain */
1879   clan_matrix_p scattering;     /* Scattering function */
1880   clan_matrix_p read;           /* Array read access informations */
1881   clan_matrix_p write;          /* Array write access informations */
1882   int nb_iterators;             /* Original depth of the statement */
1883   char ** iterators;            /* Array of iterator names */
1884   char * body;                  /* Original statement body */
1885   struct clan_statement * next; /* Next statement in the linked list */
1886 @};
1887 typedef struct clan_statement   clan_statement_t;
1888 typedef struct clan_statement * clan_statement_p;
1889 @end group
1890 @end example
1891
1892 @noindent The @code{clan_statement_t} structure represents a @code{NULL}
1893 terminated linked list of statements. The structure contains all
1894 elements of the polyhedral representation of the statement. It uses a
1895 @code{clan_matrix_list_t} data structure to represent a union for the
1896 iteration domain @code{domain} (it is simply a linked list of
1897 @code{clan_matrix_t} elements), and a @code{clan_matrix_t} data
1898 structure for the rest: the scattering function @code{scattering}, the
1899 set of array read accesses @code{read} and the set of array write
1900 accesses @code{write}.  The particular representation of each element
1901 using the @code{clan_matrix_t} data structure is described with the
1902 output file format (@pxref{Reading The Output File}). If an element is
1903 not present, the according pointer have to be @code{NULL}.  The
1904 @code{nb_iterators} field gives the depth of the statement in the
1905 original program. It may be used with any matrix to compute the number
1906 of parameters (e.g.  @code{nb_parameters = domain->NbColumns -
1907 nb_iterators - 2}) To represent the statement body, we use
1908 @code{iterators}, an array of @code{nb_iterators} strings for the
1909 surrounding loop counters names in the original program, and
1910 @code{body}, the statement body string in the original program.
1911
1912 @node clan_scop_t
1913 @subsection clan_scop_t
1914 @example
1915 @group
1916 struct clan_scop
1917 @{
1918   clan_matrix_p context;      /* Constraints on the SCoP parameters */
1919   int nb_parameters;          /* Number of parameters for the SCoP */
1920   char ** parameters;         /* Array of parameter names */
1921   int nb_arrays;              /* Number of arrays accessed in the SCoP */
1922   char ** arrays;             /* Array of array names */
1923   clan_statement_p statement; /* Statement list of the SCoP */
1924   char * optiontags;          /* The content (as a 0 terminated
1925                                  string) of the optional tags. */
1926   void * usr;                 /* A user-defined field,
1927                                  not touched by clan. */
1928 @};
1929 typedef struct clan_scop   clan_scop_t;
1930 typedef struct clan_scop * clan_scop_p;
1931 @end group
1932 @end example
1933
1934 @noindent @code{clan_scop_t} stores the useful informations of a static
1935 control part of a program to process it within a polyhedral framework.
1936 It contains the informations about the context (what is common to all
1937 statements in the SCoP) and the list of statements @code{statement}.
1938 The context is made of the constraints on the global parameters
1939 @code{context}. The representation of the context using the
1940 @code{clan_matrix_t} data structure is described with the output file
1941 format (this is a domain, @pxref{Reading The Output File}).  The list of
1942 parameter names is provided as an array of @code{nb_parameters} strings
1943 called @code{parameters} (@code{nb_parameters} is somewhat redundant as
1944 it is supposed to be equal to @code{context->NbColumns - 2}). The list
1945 of array names is provided as an array of @code{nb_arrays} strings
1946 called @code{arrays}. Each accessed array in the SCoP has a unique
1947 identifier (a strictly positive number, @pxref{Access Representation}),
1948 @code{arrays} provides the correspondance between an identifier and the
1949 real name of the accessed array. If an array has the identifier
1950 @samp{id}, then its real name is @samp{arrays[id - 1]}. The
1951 @code{optiontags} field contains the remainder of the SCoP description
1952 file, as a @code{char*} string. Optional tags specified in the
1953 @samp{Options} section are stored there. Finally, the @code{usr} field
1954 is a pointer for library users convenience. This field is not touched by
1955 Clan.
1956
1957 As an example, let us consider again the matrix-matrix multiply program
1958 (@pxref{A First Example}).
1959 The next figure gives a possible representation in memory for this
1960 SCoP thanks to the Clan data structures (it has been actually printed
1961 by the @code{clan_scop_print} function, it is also possible to ask Clan to
1962 output the internal representation using the command line option
1963 @code{-structure}):
1964
1965 @smallexample
1966 +-- clan_scop_t
1967 |       |
1968 |       +-- clan_matrix_t
1969 |       |       0 3
1970 |       |
1971 |       +-- Original parameters strings: N
1972 |       |
1973 |       +-- Accessed array strings: c a b
1974 |       |
1975 |       +-- clan_statement_t (S1)
1976 |       |       |
1977 |       |       +-- clan_matrix_list_t
1978 |       |       |       |
1979 |       |       |       +-- clan_matrix_t
1980 |       |       |       |       4 5
1981 |       |       |       |       [    1    1    0    0    0 ]
1982 |       |       |       |       [    1   -1    0    1   -1 ]
1983 |       |       |       |       [    1    0    1    0    0 ]
1984 |       |       |       |       [    1    0   -1    1   -1 ]
1985 |       |       |       |
1986 |       |       |
1987 |       |       +-- clan_matrix_t
1988 |       |       |       5 5
1989 |       |       |       [    0    0    0    0    0 ]
1990 |       |       |       [    0    1    0    0    0 ]
1991 |       |       |       [    0    0    0    0    0 ]
1992 |       |       |       [    0    0    1    0    0 ]
1993 |       |       |       [    0    0    0    0    0 ]
1994 |       |       |
1995 |       |       +-- NULL matrix
1996 |       |       |
1997 |       |       +-- clan_matrix_t
1998 |       |       |       2 5
1999 |       |       |       [    1    1    0    0    0 ]
2000 |       |       |       [    0    0    1    0    0 ]
2001 |       |       |
2002 |       |       +-- Original iterator strings: i j
2003 |       |       |
2004 |       |       +-- Original body: c[i][j]=0.0;
2005 |       |       |
2006 |       |       V
2007 |       |   clan_statement_t (S2)
2008 |       |       |
2009 |       |       +-- clan_matrix_list_t
2010 |       |       |       |
2011 |       |       |       +-- clan_matrix_t
2012 |       |       |       |       6 6
2013 |       |       |       |       [    1    1    0    0    0    0 ]
2014 |       |       |       |       [    1   -1    0    0    1   -1 ]
2015 |       |       |       |       [    1    0    1    0    0    0 ]
2016 |       |       |       |       [    1    0   -1    0    1   -1 ]
2017 |       |       |       |       [    1    0    0    1    0    0 ]
2018 |       |       |       |       [    1    0    0   -1    1   -1 ]
2019 |       |       |       |
2020 |       |       |
2021 |       |       +-- clan_matrix_t
2022 |       |       |       7 6
2023 |       |       |       [    0    0    0    0    0    0 ]
2024 |       |       |       [    0    1    0    0    0    0 ]
2025 |       |       |       [    0    0    0    0    0    0 ]
2026 |       |       |       [    0    0    1    0    0    0 ]
2027 |       |       |       [    0    0    0    0    0    1 ]
2028 |       |       |       [    0    0    0    1    0    0 ]
2029 |       |       |       [    0    0    0    0    0    0 ]
2030 |       |       |
2031 |       |       +-- clan_matrix_t
2032 |       |       |       6 6
2033 |       |       |       [    1    1    0    0    0    0 ]
2034 |       |       |       [    0    0    1    0    0    0 ]
2035 |       |       |       [    2    1    0    0    0    0 ]
2036 |       |       |       [    0    0    0    1    0    0 ]
2037 |       |       |       [    3    0    0    1    0    0 ]
2038 |       |       |       [    0    0    1    0    0    0 ]
2039 |       |       |
2040 |       |       +-- clan_matrix_t
2041 |       |       |       2 6
2042 |       |       |       [    1    1    0    0    0    0 ]
2043 |       |       |       [    0    0    1    0    0    0 ]
2044 |       |       |
2045 |       |       +-- Original iterator strings: i j k
2046 |       |       |
2047 |       |       +-- Original body: c[i][j]=c[i][j]+a[i][k]*b[k][j];
2048 |       |       |
2049 |       |
2050 |
2051 @end smallexample
2052
2053
2054 @node clan_options_t
2055 @subsection clan_options_t
2056 @example
2057 @group
2058 struct clan_options
2059 @{
2060   char * name ;  /* Name of the input file. */
2061 @};
2062 typedef struct clan_options   clan_options_t;
2063 typedef struct clan_options * clan_options_p;
2064 @end group
2065 @end example
2066
2067 @noindent The @code{clan_options_t} structure contains all the possible
2068 options to rule Clan's behaviour (@pxref{Calling Clan}). For the moment there
2069 are mainly internal options, but it's going to change in the future.
2070
2071
2072 @node Clan Functions
2073 @section Clan Functions Description
2074
2075 @menu
2076 * clan_scop_extract::
2077 * clan_scop_print_dot_scop::
2078 * clan_scop_read::
2079 * clan_scop_tag_content::
2080 * Allocation and Initialization Functions::
2081 * Memory Deallocation Functions::
2082 * Printing Functions::
2083 @end menu
2084
2085
2086 @node clan_scop_extract
2087 @subsection clan_scop_extract
2088 @example
2089 @group
2090 clan_scop_p clan_scop_extract(FILE * input, clan_options_p options);
2091 @end group
2092 @end example
2093
2094 @noindent The @code{clan_scop_extract} function extracts the polyhedral
2095 representation of a SCoP in the file provided thanks to the @code{input}
2096 pointer (the file, possibly @code{stdin}, has to be open for reading),
2097 according to some options
2098 provided thanks to the pointer @code{options} to a @code{clan_options_t}
2099 data structure (@pxref{clan_options_t}). It returns a pointer to the
2100 extracted SCoP, translated into
2101 a @code{clan_scop_t} data structure (@pxref{clan_scop_t}).
2102
2103 @node clan_scop_print_dot_scop
2104 @subsection clan_scop_print_dot_scop
2105 @example
2106 @group
2107 void clan_scop_print_dot_scop
2108 (
2109   FILE * output,
2110   clan_scop_p scop,
2111   clan_options_p options
2112 );
2113 @end group
2114 @end example
2115
2116 @noindent The function @code{clan_scop_print_dot_scop} is a pretty printer for
2117 @code{clan_scop_t} structures. It dumps the @code{scop} informations in
2118 .scop format (@pxref{Reading The Output File}) in the file provided thanks to
2119 the pointer @code{output} (the file, possibly @code{stdout}, has to be open
2120 for writing), according to some options provided thanks to the pointer
2121 @code{options} to a @code{clan_options_t} data structure
2122 (@pxref{clan_options_t}).
2123
2124
2125
2126 @node clan_scop_read
2127 @subsection clan_scop_read
2128 @example
2129 @group
2130 clan_scop_p clan_scop_read
2131 (
2132   FILE * input,
2133   clan_options_p options
2134 );
2135 @end group
2136 @end example
2137
2138 @noindent The function @code{clan_scop_read} reads a .scop file from
2139 the standard input, and returns a pointer on a freshly allocated
2140 @code{clan_scop_t} structure containing the SCoP information.
2141
2142
2143 @node clan_scop_tag_content
2144 @subsection clan_scop_tag_content
2145 @example
2146 @group
2147 char* clan_scop_tag_content
2148 (
2149   clan_scop_p scop,
2150   char* from,
2151   char* to
2152 );
2153 @end group
2154 @end example
2155
2156 @noindent The function @code{clan_scop_tag_content} reads the list of
2157 optional tags for the @code{clan_scop_t} (stored in the
2158 @code{optiontags} string), and returns a freshly allocated string of all
2159 characters between the two given strings @code{from} and @code{to}. If
2160 one or the other given strings are not found, or if there was no
2161 optional part specified in the .scop, @code{NULL} is returned.
2162
2163
2164 @node Allocation and Initialization Functions
2165 @subsection Allocation and Initialization Functions
2166 @example
2167 clan_structure_p clan_structure_malloc();
2168 @end example
2169 @noindent Each Clan data structure has an allocation and initialization
2170 function as shown above, where @code{structure} have to
2171 be replaced by the name of the convenient structure (without @samp{clan}
2172 prefix and @samp{_t} suffix) for
2173 instance @code{clan_scop_p clan_scop_malloc();}. These functions return
2174 pointers to an allocated structure with fields set to convenient default
2175 values. @strong{Using those functions is mandatory} to support internal
2176 management fields and to avoid upward compatibility problems if
2177 new fields appear. An exception is @code{clan_matrix_malloc} since the
2178 @code{clan_matrix_t} needs two parameters:
2179 the number of rows and columns of the matrix we want to allocate:
2180 @example
2181 clan_matrix_p clan_matrix_malloc(unsigned nbrows, unsigned nbcolumns);
2182 @end example
2183
2184
2185 @node Memory Deallocation Functions
2186 @subsection Memory Deallocation Functions
2187 @example
2188 void clan_structure_free(clan_structure_p);
2189 @end example
2190 @noindent Each Clan data structure has a deallocation function as shown above,
2191 where @code{structure} have to
2192 be replaced by the name of the convenient structure (without @samp{clan}
2193 prefix and @samp{_t} suffix) for
2194 instance @code{void clan_scop_free(clan_scop_p);}. These functions
2195 free the allocated memory for the structure provided as input. They free
2196 memory recursively, i.e. they also free the allocated memory for the internal
2197 structures.
2198 @strong{Using those functions is mandatory} to avoid memory leaks on internal
2199 management fields and to avoid upward compatibility problems if
2200 new fields appear.
2201
2202
2203 @node Printing Functions
2204 @subsection Printing Functions
2205 @example
2206 void clan_structure_print(FILE *, clan_structure_p) ;
2207 @end example
2208 @noindent Each Clan data structure has a printing function as shown above,
2209 where @code{structure} have to
2210 be replaced by the name of the convenient structure (without @samp{clan}
2211 prefix and @samp{_t} suffix) for
2212 instance @code{void clan_scop_print(FILE *, clan_scop_p);}. These functions
2213 print the pointed structure (and its fields recursively) to the file provided
2214 as input (the file, possibly @code{stdout}, has to be open
2215 for writing).
2216
2217
2218 @node Example of Library Utilization
2219 @section Example of Library Utilization
2220 Here is a basic example showing how it is possible to use the Clan library,
2221 assuming that a standard installation has been done.
2222 The following C program reads a source code input file on the standard input,
2223 then prints the solution on the standard output.
2224 Options are preselected to the default values of the Clan software.
2225 @example
2226 /* example.c */
2227 # include <stdio.h>
2228 # include <clan/clan.h>
2229
2230 int main()
2231 @{
2232   clan_scop_p scop;
2233   clan_options_p options;
2234
2235   /* Default option setting. */
2236   options = clan_options_malloc() ;
2237
2238   /* Extraction of the SCoP. */
2239   scop = clan_scop_extract(stdin, options);
2240
2241   /* Output of the .scop file. */
2242   clan_scop_print_dot_scop(stdout, scop, options);
2243
2244   /* Save the planet. */
2245   clan_options_free(options);
2246   clan_scop_free(scop);
2247
2248   return 0;
2249 @}
2250 @end example
2251
2252 @noindent The compilation command could be:
2253 @example
2254 gcc example.c -lclan -o example
2255 @end example
2256 @noindent A calling command with the input file test.c could be:
2257 @example
2258 more test.c | ./example
2259 @end example
2260
2261
2262
2263 @c %  ****************************** INSTALLING ********************************
2264 @node Installing
2265 @chapter Installing Clan
2266
2267 @menu
2268 * License::
2269 * Requirements::
2270 * Basic Installation::
2271 * Optional Features::
2272 * Uninstallation::
2273 @end menu
2274
2275 @node License
2276 @section License
2277 First of all, it would be very kind to refer the following paper in any
2278 publication that result from the use of the Clan software or its library,
2279 @pxref{Bas03} (a bibtex entry is provided behind the title page of this
2280 manual, along with copyright notice).
2281
2282 This program is free software; you can redistribute it and/or
2283 modify it under the terms of the GNU Lesser General Public License
2284 as published by the Free Software Foundation,
2285 either version 3 of the  License, or (at your option) any later version.
2286 This program is distributed in the hope that it will be useful,
2287 but WITHOUT ANY WARRANTY; without even the implied warranty of
2288 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
2289 GNU General Public License for more details.
2290 @code{http://www.gnu.org/copyleft/lgpl.html}
2291
2292
2293 @node Requirements
2294 @section Requirements
2295
2296
2297 Clan is a stand-alone tool and library. For a basic use, it does not need any
2298 additional tool or library. Anyway, to be able to work in conjunction with
2299 other tools that manipulate multiple precision numbers, the GNU GMP library
2300 can be used as an option.
2301
2302
2303 @menu
2304 * GMP Library::
2305 @end menu
2306
2307
2308 @node GMP Library
2309 @subsection GMP Library (optional)
2310
2311 To be able to deal with insanely large coefficient, the user will need to
2312 install the GNU Multiple Precision Library (GMP for short) version 4.2.2
2313 or above. It can be freely downloaded from @code{http://www.swox.com/gmp}.
2314 The user can compile it by typing the following commands on the GMP root
2315 directory:
2316
2317 @itemize @bullet
2318 @item @code{./configure}
2319 @item @code{make}
2320 @item And as root: @code{make install}
2321 @end itemize
2322
2323 The GMP default installation is @code{/usr/local}. This directory may
2324 not be inside your library path. To fix the problem, the user should set
2325 @example
2326 export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib
2327 @end example
2328 @noindent if your shell is, e.g., bash or
2329 @example
2330 setenv LD_LIBRARY_PATH $LD_LIBRARY_PATH:/usr/local/lib
2331 @end example
2332 @noindent if your shell is, e.g., tcsh. Add the line to your .bashrc or .tcshrc (or
2333 whatever convenient file) to make this change permanent. Another solution
2334 is to ask GMP to install in the standard path by using the prefix
2335 option of the configure script:
2336 @samp{./configure --prefix=/usr}.
2337
2338 Clan has to be built using the GMP library by specifying the convenient
2339 configure script options to buid the GMP version (@pxref{Optional Features}).
2340
2341
2342 @node Basic Installation
2343 @section Clan Basic Installation
2344
2345 Once downloaded and unpacked
2346 (e.g. using the @samp{tar -zxvf clan-@value{VERSION}.tar.gz} command),
2347 you can compile Clan by typing the following commands on the Clan's root
2348 directory:
2349
2350 @itemize @bullet
2351 @item @code{./configure}
2352 @item @code{make}
2353 @item And as root: @code{make install}
2354 @end itemize
2355
2356 The program binaries and object files can be removed from the
2357 source code directory by typing @code{make clean}. To also remove the
2358 files that the @code{configure} script created (so you can compile the
2359 package for a different kind of computer) type @code{make distclean}.
2360
2361
2362 @node Optional Features
2363 @section Optional Features
2364 The @code{configure} shell script attempts to guess correct values for
2365 various system-dependent variables and user options used during compilation.
2366 It uses those values to create the @code{Makefile}. Various user options
2367 are provided by the Clan's configure script. They are summarized in the
2368 following list and may be printed by typing @code{./configure --help} in the
2369 Clan top-level directory.
2370
2371 @itemize @bullet
2372 @item By default, the installation directory is @code{/usr/local}:
2373 @code{make install} will install the package's files in
2374 @code{/usr/local/bin}, @code{/usr/local/lib} and @code{/usr/local/include}.
2375 The user can specify an installation prefix other than @code{/usr/local} by
2376 giving @code{configure} the option @code{--prefix=PATH}.
2377
2378 @item By default, Clan is built in 64bits version. If the user give to
2379 @code{configure} the option
2380 @code{--enable-int-version}, the 32bits version of Clan will be compiled.
2381 In the same way, the option @code{--enable-mp-version} have to be used to
2382 build the multiple precision version.
2383
2384 @item By default, @code{configure} will look for the GMP library
2385 (necessary to build the multiple precision version) in standard
2386 locations. If necessary, the user can specify the GMP path by giving
2387 @code{configure} the option @code{--with-gmp=PATH}.
2388 @end itemize
2389
2390 @node Uninstallation
2391 @section Uninstallation
2392 The user can easily remove the Clan software and library from his system
2393 by typing (as root if necessary) from the Clan top-level directory
2394 @code{make uninstall}.
2395
2396 @c %  **************************** DOCUMENTATION ******************************
2397 @node Documentation
2398 @chapter Documentation
2399 The Clan distribution provides several documentation sources. First, the
2400 source code itself is as documented as possible. The code comments use a
2401 Doxygen-compatible presentation (something similar to what JavaDoc does for
2402 JAVA). The user may install Doxygen
2403 (see @code{http://www.stack.nl/~dimitri/doxygen}) to automatically
2404 generate a technical documentation by typing @code{make doc} or
2405 @code{doxygen ./autoconf/Doxyfile} at the Clan top-level directory after
2406 running the configure script (@pxref{Installing}). Doxygen will generate
2407 documentation sources (in HTML, LaTeX and man) in the @code{doc/source}
2408 directory of the Clan distribution.
2409
2410 The Texinfo sources of the present document are also provided in the @code{doc}
2411 directory. You can build it in either DVI format (by typing
2412 @code{texi2dvi cloog.texi}) or PDF format
2413 (by typing @code{texi2pdf cloog.texi}) or HTML format
2414 (by typing @code{makeinfo --html cloog.texi}, using @code{--no-split}
2415 option to generate a single HTML file) or info format
2416 (by typing @code{makeinfo cloog.texi}).
2417
2418 @c %  ****************************** REFERENCES ********************************
2419 @node References
2420 @chapter References
2421
2422 @itemize
2423 @item
2424 @anchor{Bas03a}[Bas03a] C. Bastoul, P. Feautrier. Improving data locality
2425 by chunking. CC'12 International Conference on Compiler Construction,
2426 LNCS 2622, pages 320-335, Warsaw, april 2003.
2427
2428 @item
2429 @anchor{Bas03}[Bas03] C. Bastoul and A. Cohen and S. Girbal and
2430 S. Sharma and O. Temam. Putting Polyhedral Loop Transformations to
2431 Work, LCPC'16 International Workshop on Languages and
2432 Compilers for Parallel Computers, LNCS 2958, pages 209--225,
2433 College Station, Texas, october 2003.
2434
2435 @item
2436 @anchor{Fea92}[Fea92] P. Feautrier Some efficient solutions to the affine
2437 scheduling problem, part II: multidimensional time.
2438 International Journal of Parallel Programming, 21(6):389--420, December 1992.
2439
2440 @item
2441 @anchor{Gri04}[Gri04] M. Griebl. Automatic parallelization of loop programs
2442 for distributed memory architectures. Habilitation Thesis. Facult@"at f@"ur
2443 Mathematik und Informatik, Universit@"at Passau, 2004.
2444 @emph{http://www.infosun.fmi.uni-passau.de/cl/loopo/}
2445
2446 @item
2447 @anchor{Wil93}[Wil93] Doran K. Wilde.
2448 A library for doing polyhedral operations.
2449 Technical Report 785, IRISA, Rennes, France, 1993.
2450
2451 @end itemize
2452
2453
2454
2455
2456 @c % /*************************************************************************
2457 @c %  *                       PART VI: END OF THE DOCUMENT                    *
2458 @c %  *************************************************************************/
2459 @c @unnumbered Index
2460
2461 @c @printindex cp
2462
2463 @bye