gcc/ada/exp_unst.ads

   1 ------------------------------------------------------------------------------
   2 --                                                                          --
   3 --                         GNAT COMPILER COMPONENTS                         --
   4 --                                                                          --
   5 --                             E X P _ U N S T                              --
   6 --                                                                          --
   7 --                                 S p e c                                  --
   8 --                                                                          --
   9 --          Copyright (C) 2014-2015, Free Software Foundation, Inc.         --
  10 --                                                                          --
  11 -- GNAT is free software;  you can  redistribute it  and/or modify it under --
  12 -- terms of the  GNU General Public License as published  by the Free Soft- --
  13 -- ware  Foundation;  either version 3,  or (at your option) any later ver- --
  14 -- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
  15 -- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
  16 -- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
  17 -- for  more details.  You should have  received  a copy of the GNU General --
  18 -- Public License  distributed with GNAT; see file COPYING3.  If not, go to --
  19 -- http://www.gnu.org/licenses for a complete copy of the license.          --
  20 --                                                                          --
  21 -- GNAT was originally developed  by the GNAT team at  New York University. --
  22 -- Extensive contributions were provided by Ada Core Technologies Inc.      --
  23 --                                                                          --
  24 ------------------------------------------------------------------------------
  25
  26 --  Expand routines for unnesting subprograms
  27
  28 with Types; use Types;
  29
  30 package Exp_Unst is
  31
  32    --  -----------------
  33    --  -- The Problem --
  34    --  -----------------
  35
  36    --  Normally, nested subprograms in the source result in corresponding
  37    --  nested subprograms in the resulting tree. We then expect the back end
  38    --  to handle such nested subprograms, including all cases of uplevel
  39    --  references. For example, the GCC back end can do this relatively easily
  40    --  since GNU C (as an extension) allows nested functions with uplevel
  41    --  references, and implements an appropriate static chain approach to
  42    --  dealing with such uplevel references.
  43
  44    --  However, we also want to be able to interface with back ends that do
  45    --  not easily handle such uplevel references. One example is the back end
  46    --  that translates the tree into standard C source code. In the future,
  47    --  other back ends might need the same capability (e.g. a back end that
  48    --  generated LLVM intermediate code).
  49
  50    --  We could imagine simply handling such references in the appropriate
  51    --  back end. For example the back end that generates C could recognize
  52    --  nested subprograms and rig up some way of translating them, e.g. by
  53    --  making a static-link source level visible.
  54
  55    --  Rather than take that approach, we prefer to do a semantics-preserving
  56    --  transformation on the GNAT tree, that eliminates the problem before we
  57    --  hand the tree over to the back end. There are two reasons for preferring
  58    --  this approach:
  59
  60    --     First: the work needs only to be done once for all affected back ends
  61    --     and we can remain within the semantics of the tree. The front end is
  62    --     full of tree transformations, so we have all the infrastructure for
  63    --     doing transformations of this type.
  64
  65    --     Second: given that the transformation will be semantics-preserving,
  66    --     we can still used the standard GCC back end to build code from it.
  67    --     This means we can easily run our full test suite to verify that the
  68    --     transformations are indeed semantics preserving. It is a lot more
  69    --     work to thoroughly test the output of specialized back ends.
  70
  71    --  Looking at the problem, we have three situations to deal with. Note
  72    --  that in these examples, we use all lower case, since that is the way
  73    --  the internal tree is cased.
  74
  75    --     First, cases where there are no uplevel references, for example
  76
  77    --       procedure case1 is
  78    --          function max (m, n : Integer) return integer is
  79    --          begin
  80    --             return integer'max (m, n);
  81    --          end max;
  82    --          ...
  83    --       end case1;
  84
  85    --     Second, cases where there are explicit uplevel references.
  86
  87    --       procedure case2 (b : integer) is
  88    --          procedure Inner (bb : integer);
  89    --
  90    --          procedure inner2 is
  91    --          begin
  92    --            inner(5);
  93    --          end;
  94    --
  95    --          x  : integer := 77;
  96    --          y  : constant integer := 15 * 16;
  97    --          rv : integer := 10;
  98    --
  99    --          procedure inner (bb : integer) is
 100    --          begin
 101    --             x := rv + y + bb + b;
 102    --          end;
 103    --
 104    --       begin
 105    --          inner2;
 106    --       end case2;
 107
 108    --     In this second example, B, X, RV are uplevel referenced. Y is not
 109    --     considered as an uplevel reference since it is a static constant
 110    --     where references are replaced by the value at compile time.
 111
 112    --   Third, cases where there are implicit uplevel references via types
 113    --   whose bounds depend on locally declared constants or variables:
 114
 115    --       function case3 (x, y : integer) return boolean is
 116    --          subtype dynam is integer range x .. y + 3;
 117    --          subtype static is integer range 42 .. 73;
 118    --          xx : dynam := y;
 119    --
 120    --          type darr is array (dynam) of Integer;
 121    --          type darec is record
 122    --             A : darr;
 123    --             B : integer;
 124    --          end record;
 125    --          darecv : darec;
 126    --
 127    --          function inner (b : integer) return boolean is
 128    --          begin
 129    --            return b in dynam and then darecv.b in static;
 130    --          end inner;
 131    --
 132    --       begin
 133    --         return inner (42) and then inner (xx * 3 - y * 2);
 134    --       end case3;
 135    --
 136    --     In this third example, the membership test implicitly references the
 137    --     the bounds of Dynam, which both involve uplevel references.
 138
 139    --  ------------------
 140    --  -- The Solution --
 141    --  ------------------
 142
 143    --  Looking at the three cases above, the first case poses no problem at
 144    --  all. Indeed the subprogram could have been declared at the outer level
 145    --  (perhaps changing the name). But this style is quite common as a way
 146    --  of limiting the scope of a local procedure called only within the outer
 147    --  procedure. We could move it to the outer level (with a name change if
 148    --  needed), but we don't bother. We leave it nested, and the back end just
 149    --  translates it as though it were not nested.
 150
 151    --  In general we leave nested procedures nested, rather than trying to move
 152    --  them to the outer level (the back end may do that, e.g. as part of the
 153    --  translation to C, but we don't do it in the tree itself). This saves a
 154    --  LOT of trouble in terms of visibility and semantics.
 155
 156    --  But of course we have to deal with the uplevel references. The idea is
 157    --  to rewrite these nested subprograms so that they no longer have any such
 158    --  uplevel references, so by the time they reach the back end, they all are
 159    --  case 1 (no uplevel references) and thus easily handled.
 160
 161    --  To deal with explicit uplevel references (case 2 above), we proceed with
 162    --  the following steps:
 163
 164    --    All entities marked as being uplevel referenced are marked as aliased
 165    --    since they will be accessed indirectly via an activation record as
 166    --    described below.
 167
 168    --    An activation record is created containing system address values
 169    --    for each uplevel referenced entity in a given scope. In the example
 170    --    given before, we would have:
 171
 172    --      type AREC1T is record
 173    --         b  : Address;
 174    --         x  : Address;
 175    --         rv : Address;
 176    --      end record;
 177
 178    --      AREC1 : aliased AREC1T;
 179
 180    --      type AREC1PT is access all AREC1T;
 181    --      AREC1P : constant AREC1PT := AREC1'Access;
 182
 183    --   The fields of AREC1 are set at the point the corresponding entity
 184    --   is declared (immediately for parameters).
 185
 186    --   Note: the 1 in all these names represents the fact that we are at the
 187    --   outer level of nesting. As we will see later, deeper levels of nesting
 188    --   will use AREC2, AREC3, ...
 189
 190    --   For all subprograms nested immediately within the corresponding scope,
 191    --   a parameter AREC1F is passed, and all calls to these routines have
 192    --   AREC1P added as an additional formal.
 193
 194    --   Now within the nested procedures, any reference to an uplevel entity
 195    --   xxx is replaced by Tnn!(AREC1.xxx).all (where ! represents a call
 196    --   to unchecked conversion to convert the address to the access type
 197    --   and Tnn is a locally declared type that is "access all t", where t
 198    --   is the type of the reference).
 199
 200    --   Note: the reason that we use Address as the component type in the
 201    --   declaration of AREC1T is that we may create this type before we see
 202    --   the declaration of this type.
 203
 204    --   The following shows example 2 above after this translation:
 205
 206    --       procedure case2x (b : aliased Integer) is
 207    --          type AREC1T is record
 208    --             b  : Address;
 209    --             x  : Address;
 210    --             rv : Address;
 211    --          end record;
 212    --
 213    --          AREC1 : aliased AREC1T;
 214    --          type AREC1PT is access all AREC1T;
 215    --          AREC1P : constant AREC1PT := AREC1'Access;
 216    --
 217    --          AREC1.b := b'Address;
 218    --
 219    --          procedure inner (bb : integer; AREC1F : AREC1PT);
 220    --
 221    --          procedure inner2 (AREC1F : AREC1PT) is
 222    --          begin
 223    --            inner(5, AREC1F);
 224    --          end;
 225    --
 226    --          x  : aliased integer := 77;
 227    --          AREC1.x := X'Address;
 228    --
 229    --          y  : constant Integer := 15 * 16;
 230    --
 231    --          rv : aliased Integer;
 232    --          AREC1.rv := rv'Address;
 233    --
 234    --          procedure inner (bb : integer; AREC1F : AREC1PT) is
 235    --          begin
 236    --             type Tnn1 is access all Integer;
 237    --             type Tnn2 is access all Integer;
 238    --             type Tnn3 is access all Integer;
 239    --             Tnn1!(AREC1F.x).all :=
 240    --               Tnn2!(AREC1F.rv).all + y + b + Tnn3!(AREC1F.b).all;
 241    --          end;
 242    --
 243    --       begin
 244    --          inner2 (AREC1P);
 245    --       end case2x;
 246
 247    --  And now the inner procedures INNER2 and INNER have no uplevel references
 248    --  so they have been reduced to case 1, which is the case easily handled by
 249    --  the back end. Note that the generated code is not strictly legal Ada
 250    --  because of the assignments to AREC1 in the declarative sequence, but the
 251    --  GNAT tree always allows such mixing of declarations and statements, so
 252    --  the back end must be prepared to handle this in any case.
 253
 254    --  Case 3 where we have uplevel references to types is a bit more complex.
 255    --  That would especially be the case if we did a full transformation that
 256    --  completely eliminated such uplevel references as we did for case 2. But
 257    --  instead of trying to do that, we rewrite the subprogram so that the code
 258    --  generator can easily detect and deal with these uplevel type references.
 259
 260    --  First we distinguish two cases
 261
 262    --    Static types are one of the two following cases:
 263
 264    --      Discrete types whose bounds are known at compile time. This is not
 265    --      quite the same as what is tested by Is_OK_Static_Subtype, in that
 266    --      it allows compile time known values that are not static expressions.
 267
 268    --      Composite types, whose components are (recursively) static types.
 269
 270    --    Dynamic types are one of the two following cases:
 271
 272    --      Discrete types with at least one bound not known at compile time.
 273
 274    --      Composite types with at least one component that is (recursively)
 275    --      a dynamic type.
 276
 277    --    Uplevel references to static types are not a problem, the front end
 278    --    or the code generator fetches the bounds as required, and since they
 279    --    are compile time known values, this value can just be extracted and
 280    --    no actual uplevel reference is required.
 281
 282    --    Uplevel references to dynamic types are a potential problem, since
 283    --    such references may involve an implicit access to a dynamic bound,
 284    --    and this reference is an implicit uplevel access.
 285
 286    --    To fully unnest such references would be messy, since we would have
 287    --    to create local copies of the dynamic types involved, so that the
 288    --    front end or code generator could generate an explicit uplevel
 289    --    reference to the bound involved. Rather than do that, we set things
 290    --    up so that this situation can be easily detected and dealt with when
 291    --    there is an implicit reference to the bounds.
 292
 293    --    What we do is to always generate a local constant for any dynamic
 294    --    bound in a dynamic subtype xx with name xx_FIRST or xx_LAST. The one
 295    --    case where we can skip this is where the bound is For
 296    --    example in the third example above, subtype dynam is expanded as
 297
 298    --      dynam_LAST  : constant Integer := y + 3;
 299    --      subtype dynam is integer range x .. dynam_LAST;
 300
 301    --    Now if type dynam is uplevel referenced (as it is this case), then
 302    --    the bounds x and dynam_LAST are marked as uplevel references
 303    --    so that appropriate entries are made in the activation record. Any
 304    --    explicit reference to such a bound in the front end generated code
 305    --    will be handled by the normal uplevel reference mechanism which we
 306    --    described above for case 2. For implicit references by a back end
 307    --    that needs to unnest things, any such implicit reference to one of
 308    --    these bounds can be replaced by an appropriate reference to the entry
 309    --    in the activation record for xx_FIRST or xx_LAST. Thus the back end
 310    --    can eliminate the problematical uplevel reference without the need to
 311    --    do the heavy tree modification to do that at the code expansion level
 312
 313    --  Looking at case 3 again, here is the normal -gnatG expanded code
 314
 315      --  function case3 (x : integer; y : integer) return boolean is
 316      --     dynam_LAST : constant integer := y {+} 3;
 317      --     subtype dynam is integer range x .. dynam_LAST;
 318      --     subtype static is integer range 42 .. 73;
 319      --
 320      --     [constraint_error when
 321      --       not (y in x .. dynam_LAST)
 322      --       "range check failed"]
 323      --
 324      --     xx : dynam := y;
 325      --
 326      --     type darr is array (x .. dynam_LAST) of integer;
 327      --     type darec is record
 328      --        a : darr;
 329      --        b : integer;
 330      --     end record;
 331      --     [type TdarrB is array (x .. dynam_LAST range <>) of integer]
 332      --     freeze TdarrB []
 333      --     darecv : darec;
 334      --
 335      --     function inner (b : integer) return boolean is
 336      --     begin
 337      --        return b in x .. dynam_LAST and then darecv.b in 42 .. 73;
 338      --     end inner;
 339      --  begin
 340      --     return inner (42) and then inner (xx {*} 3 {-} y {*} 2);
 341      --  end case3;
 342
 343    --  Note: the actual expanded code has fully qualified names so for
 344    --  example function inner is actually function case3__inner. For now
 345    --  we ignore that detail to clarify the examples.
 346
 347    --  Here we see that some of the bounds references are expanded by the
 348    --  front end, so that we get explicit references to y or dynamLast. These
 349    --  cases are handled by the normal uplevel reference mechanism described
 350    --  above for case 2. This is the case for the constraint check for the
 351    --  initialization of xx, and the range check in function inner.
 352
 353    --  But the reference darecv.b in the return statement of function
 354    --  inner has an implicit reference to the bounds of dynam, since to
 355    --  compute the location of b in the record, we need the length of a.
 356
 357    --  Here is the full translation of the third example:
 358
 359    --       function case3x (x, y : integer) return boolean is
 360    --          type AREC1T is record
 361    --             x          : Address;
 362    --             dynam_LAST : Address;
 363    --          end record;
 364    --
 365    --          AREC1 : aliased AREC1T;
 366    --          type AREC1PT is access all AREC1T;
 367    --          AREC1P : constant AREC1PT := AREC1'Access;
 368    --
 369    --          AREC1.x := x'Address;
 370    --
 371    --          dynam_LAST : constant integer := y {+} 3;
 372    --          AREC1.dynam_LAST := dynam_LAST'Address;
 373    --          subtype dynam is integer range x .. dynam_LAST;
 374    --          xx : dynam := y;
 375    --
 376    --          [constraint_error when
 377    --            not (y in x .. dynam_LAST)
 378    --            "range check failed"]
 379    --
 380    --          subtype static is integer range 42 .. 73;
 381    --
 382    --          type darr is array (x .. dynam_LAST) of Integer;
 383    --          type darec is record
 384    --             A : darr;
 385    --             B : integer;
 386    --          end record;
 387    --          darecv : darec;
 388    --
 389    --          function inner (b : integer; AREC1F : AREC1PT) return boolean is
 390    --          begin
 391    --             type Tnn is access all Integer
 392    --             return b in x .. Tnn!(AREC1F.dynam_LAST).all
 393    --               and then darecv.b in 42 .. 73;
 394    --          end inner;
 395    --
 396    --       begin
 397    --         return inner (42, AREC1P) and then inner (xx * 3, AREC1P);
 398    --       end case3x;
 399
 400    --  And now the back end when it processes darecv.b will access the bounds
 401    --  of darecv.a by referencing the d and dynam_LAST fields of AREC1P.
 402
 403    -----------------------------
 404    -- Multiple Nesting Levels --
 405    -----------------------------
 406
 407    --  In our examples so far, we have only nested to a single level, but the
 408    --  scheme generalizes to multiple levels of nesting and in this section we
 409    --  discuss how this generalization works.
 410
 411    --  Consider this example with two nesting levels
 412
 413    --  To deal with elimination of uplevel references, we follow the same basic
 414    --  approach described above for case 2, except that we need an activation
 415    --  record at each nested level. Basically the rule is that any procedure
 416    --  that has nested procedures needs an activation record. When we do this,
 417    --  the inner activation records have a pointer (uplink) to the immediately
 418    --  enclosing activation record, the normal arrangement of static links. The
 419    --  following shows the full translation of this fourth case.
 420
 421    --     function case4x (x : integer) return integer is
 422    --        type AREC1T is record
 423    --           v1 : Address;
 424    --        end record;
 425    --
 426    --        AREC1 : aliased AREC1T;
 427    --        type AREC1PT is access all AREC1T;
 428    --        AREC1P : constant AREC1PT := AREC1'Access;
 429    --
 430    --        v1 : integer := x;
 431    --        AREC1.v1 := v1'Address;
 432    --
 433    --        function inner1 (y : integer; AREC1F : AREC1PT) return integer is
 434    --           type AREC2T is record
 435    --              AREC1U : AREC1PT := AREC1F;
 436    --              v2     : Address;
 437    --           end record;
 438    --
 439    --           AREC2 : aliased AREC2T;
 440    --           type AREC2PT is access all AREC2T;
 441    --           AREC2P : constant AREC2PT := AREC2'Access;
 442    --
 443    --           type Tnn1 is access all Integer;
 444    --           v2 : integer := Tnn1!(AREC1F.v1).all {+} 1;
 445    --           AREC2.v2 := v2'Address;
 446    --
 447    --           function inner2
 448    --              (z : integer; AREC2F : AREC2PT) return integer
 449    --           is
 450    --           begin
 451    --              type Tnn1 is access all Integer;
 452    --              type Tnn2 is access all Integer;
 453    --              return integer(z {+}
 454    --                             Tnn1!(AREC2F.AREC1U.v1).all {+}
 455    --                             Tnn2!(AREC2F.v2).all);
 456    --           end inner2;
 457    --        begin
 458    --           type Tnn is access all Integer;
 459    --           return integer(y {+} inner2 (Tnn!(AREC1F.v1).all, AREC2P));
 460    --        end inner1;
 461    --     begin
 462    --        return inner1 (x, AREC1P);
 463    --     end case4x;
 464
 465    --  As can be seen in this example, the level number following AREC in the
 466    --  names avoids any confusion between AREC names at different levels.
 467
 468    -------------------------
 469    -- Name Disambiguation --
 470    -------------------------
 471
 472    --  As described above, the translation scheme would raise issues when the
 473    --  code generator did the actual unnesting if identically named nested
 474    --  subprograms exist. Similarly overloading would cause a naming issue.
 475
 476    --  In fact, the expanded code includes qualified names which eliminate this
 477    --  problem. We omitted the qualification from the exapnded examples above
 478    --  for simplicity. But to see this in action, consider this example:
 479
 480    --    function Mnames return Boolean is
 481    --       procedure Inner is
 482    --          procedure Inner is
 483    --          begin
 484    --             null;
 485    --          end;
 486    --       begin
 487    --          Inner;
 488    --       end;
 489    --       function F (A : Boolean) return Boolean is
 490    --       begin
 491    --          return not A;
 492    --       end;
 493    --       function F (A : Integer) return Boolean is
 494    --       begin
 495    --          return A > 42;
 496    --       end;
 497    --    begin
 498    --       Inner;
 499    --       return F (42) or F (True);
 500    --    end;
 501
 502    --  The expanded code actually looks like:
 503
 504    --    function mnames return boolean is
 505    --       procedure mnames__inner is
 506    --          procedure mnames__inner__inner is
 507    --          begin
 508    --             null;
 509    --             return;
 510    --          end mnames__inner__inner;
 511    --       begin
 512    --          mnames__inner__inner;
 513    --          return;
 514    --       end mnames__inner;
 515    --       function mnames__f (a : boolean) return boolean is
 516    --       begin
 517    --          return not a;
 518    --       end mnames__f;
 519    --       function mnames__f__2 (a : integer) return boolean is
 520    --       begin
 521    --          return a > 42;
 522    --       end mnames__f__2;
 523    --    begin
 524    --       mnames__inner;
 525    --       return mnames__f__2 (42) or mnames__f (true);
 526    --    end mnames;
 527
 528    --  As can be seen from studying this example, the qualification deals both
 529    --  with the issue of clashing names (mnames__inner, mnames__inner__inner),
 530    --  and with overloading (mnames__f, mnames__f__2).
 531
 532    -----------------
 533    -- Subprograms --
 534    -----------------
 535
 536    procedure Check_Uplevel_Reference_To_Type (Typ : Entity_Id);
 537    --  This procedure is called if Sem_Util.Check_Nested_Access detects an
 538    --  uplevel reference to a type or subtype entity Typ. On return there are
 539    --  two cases, if Typ is a static type (defined as a discrete type with
 540    --  static bounds, or a record all of whose components are of a static type,
 541    --  or an array whose index and component types are all static types), then
 542    --  the flag Is_Static_Type (Typ) will be set True, and in this case the
 543    --  flag Has_Uplevel_Reference is not set since we don't need to worry about
 544    --  uplevel references to static types. If on the other hand Typ is not a
 545    --  static type, then the flag Has_Uplevel_Reference will be set, and any
 546    --  non-static bounds referenced by the type will also be marked as having
 547    --  uplevel references (by setting Has_Uplevel_Reference for these bounds).
 548
 549    procedure Note_Uplevel_Reference (N : Node_Id; Subp : Entity_Id);
 550    --  Called in Unnest_Subprogram_Mode when we detect an explicit uplevel
 551    --  reference (node N) to an enclosing subprogram Subp.
 552
 553    procedure Unnest_Subprogram (Subp : Entity_Id; Subp_Body : Node_Id);
 554    --  Subp is a library level subprogram which has nested subprograms, and
 555    --  Subp_Body is the corresponding N_Subprogram_Body node. This procedure
 556    --  declares the AREC types and objects, adds assignments to the AREC record
 557    --  as required, defines the xxxPTR types for uplevel referenced objects,
 558    --  adds the ARECP parameter to all nested subprograms which need it, and
 559    --  modifies all uplevel references appropriately.
 560
 561 end Exp_Unst;