gcc/tree-call-cdce.c

   1 /* Conditional Dead Call Elimination pass for the GNU compiler.
   2    Copyright (C) 2008-2018 Free Software Foundation, Inc.
   3    Contributed by Xinliang David Li <davidxl@google.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "backend.h"
  25 #include "tree.h"
  26 #include "gimple.h"
  27 #include "cfghooks.h"
  28 #include "tree-pass.h"
  29 #include "ssa.h"
  30 #include "gimple-pretty-print.h"
  31 #include "fold-const.h"
  32 #include "stor-layout.h"
  33 #include "gimple-iterator.h"
  34 #include "tree-cfg.h"
  35 #include "tree-into-ssa.h"
  36 #include "builtins.h"
  37 #include "internal-fn.h"
  38 #include "tree-dfa.h"
  39 \f
  40
  41 /* This pass serves two closely-related purposes:
  42
  43    1. It conditionally executes calls that set errno if (a) the result of
  44       the call is unused and (b) a simple range check on the arguments can
  45       detect most cases where errno does not need to be set.
  46
  47       This is the "conditional dead-code elimination" that gave the pass
  48       its original name, since the call is dead for most argument values.
  49       The calls for which it helps are usually part of the C++ abstraction
  50       penalty exposed after inlining.
  51
  52    2. It looks for calls to built-in functions that set errno and whose
  53       result is used.  It checks whether there is an associated internal
  54       function that doesn't set errno and whether the target supports
  55       that internal function.  If so, the pass uses the internal function
  56       to compute the result of the built-in function but still arranges
  57       for errno to be set when necessary.  There are two ways of setting
  58       errno:
  59
  60       a. by protecting the original call with the same argument checks as (1)
  61
  62       b. by protecting the original call with a check that the result
  63          of the internal function is not equal to itself (i.e. is NaN).
  64
  65       (b) requires that NaNs are the only erroneous results.  It is not
  66       appropriate for functions like log, which returns ERANGE for zero
  67       arguments.  (b) is also likely to perform worse than (a) because it
  68       requires the result to be calculated first.  The pass therefore uses
  69       (a) when it can and uses (b) as a fallback.
  70
  71       For (b) the pass can replace the original call with a call to
  72       IFN_SET_EDOM, if the target supports direct assignments to errno.
  73
  74    In both cases, arguments that require errno to be set should occur
  75    rarely in practice.  Checks of the errno result should also be rare,
  76    but the compiler would need powerful interprocedural analysis to
  77    prove that errno is not checked.  It's much easier to add argument
  78    checks or result checks instead.
  79
  80      An example of (1) is:
  81
  82          log (x);   // Mostly dead call
  83      ==>
  84          if (__builtin_islessequal (x, 0))
  85              log (x);
  86
  87      With this change, call to log (x) is effectively eliminated, as
  88      in the majority of the cases, log won't be called with x out of
  89      range.  The branch is totally predictable, so the branch cost
  90      is low.
  91
  92      An example of (2) is:
  93
  94         y = sqrt (x);
  95      ==>
  96         y = IFN_SQRT (x);
  97         if (__builtin_isless (x, 0))
  98             sqrt (x);
  99
 100      In the vast majority of cases we should then never need to call sqrt.
 101
 102    Note that library functions are not supposed to clear errno to zero without
 103    error.  See IEEE Std 1003.1, section 2.3 Error Numbers, and section 7.5:3 of
 104    ISO/IEC 9899 (C99).
 105
 106    The condition wrapping the builtin call is conservatively set to avoid too
 107    aggressive (wrong) shrink wrapping.  */
 108
 109
 110 /* A structure for representing input domain of
 111    a function argument in integer.  If the lower
 112    bound is -inf, has_lb is set to false.  If the
 113    upper bound is +inf, has_ub is false.
 114    is_lb_inclusive and is_ub_inclusive are flags
 115    to indicate if lb and ub value are inclusive
 116    respectively.  */
 117
 118 struct inp_domain
 119 {
 120   int lb;
 121   int ub;
 122   bool has_lb;
 123   bool has_ub;
 124   bool is_lb_inclusive;
 125   bool is_ub_inclusive;
 126 };
 127
 128 /* A helper function to construct and return an input
 129    domain object.  LB is the lower bound, HAS_LB is
 130    a boolean flag indicating if the lower bound exists,
 131    and LB_INCLUSIVE is a boolean flag indicating if the
 132    lower bound is inclusive or not.  UB, HAS_UB, and
 133    UB_INCLUSIVE have the same meaning, but for upper
 134    bound of the domain.  */
 135
 136 static inp_domain
 137 get_domain (int lb, bool has_lb, bool lb_inclusive,
 138             int ub, bool has_ub, bool ub_inclusive)
 139 {
 140   inp_domain domain;
 141   domain.lb = lb;
 142   domain.has_lb = has_lb;
 143   domain.is_lb_inclusive = lb_inclusive;
 144   domain.ub = ub;
 145   domain.has_ub = has_ub;
 146   domain.is_ub_inclusive = ub_inclusive;
 147   return domain;
 148 }
 149
 150 /* A helper function to check the target format for the
 151    argument type. In this implementation, only IEEE formats
 152    are supported.  ARG is the call argument to be checked.
 153    Returns true if the format is supported.  To support other
 154    target formats,  function get_no_error_domain needs to be
 155    enhanced to have range bounds properly computed. Since
 156    the check is cheap (very small number of candidates
 157    to be checked), the result is not cached for each float type.  */
 158
 159 static bool
 160 check_target_format (tree arg)
 161 {
 162   tree type;
 163   machine_mode mode;
 164   const struct real_format *rfmt;
 165
 166   type = TREE_TYPE (arg);
 167   mode = TYPE_MODE (type);
 168   rfmt = REAL_MODE_FORMAT (mode);
 169   if ((mode == SFmode
 170        && (rfmt == &ieee_single_format || rfmt == &mips_single_format
 171            || rfmt == &motorola_single_format))
 172       || (mode == DFmode
 173           && (rfmt == &ieee_double_format || rfmt == &mips_double_format
 174               || rfmt == &motorola_double_format))
 175       /* For long double, we can not really check XFmode
 176          which is only defined on intel platforms.
 177          Candidate pre-selection using builtin function
 178          code guarantees that we are checking formats
 179          for long double modes: double, quad, and extended.  */
 180       || (mode != SFmode && mode != DFmode
 181           && (rfmt == &ieee_quad_format
 182               || rfmt == &mips_quad_format
 183               || rfmt == &ieee_extended_motorola_format
 184               || rfmt == &ieee_extended_intel_96_format
 185               || rfmt == &ieee_extended_intel_128_format
 186               || rfmt == &ieee_extended_intel_96_round_53_format)))
 187     return true;
 188
 189   return false;
 190 }
 191
 192 \f
 193 /* A helper function to help select calls to pow that are suitable for
 194    conditional DCE transformation.  It looks for pow calls that can be
 195    guided with simple conditions.  Such calls either have constant base
 196    values or base values converted from integers.  Returns true if
 197    the pow call POW_CALL is a candidate.  */
 198
 199 /* The maximum integer bit size for base argument of a pow call
 200    that is suitable for shrink-wrapping transformation.  */
 201 #define MAX_BASE_INT_BIT_SIZE 32
 202
 203 static bool
 204 check_pow (gcall *pow_call)
 205 {
 206   tree base, expn;
 207   enum tree_code bc, ec;
 208
 209   if (gimple_call_num_args (pow_call) != 2)
 210     return false;
 211
 212   base = gimple_call_arg (pow_call, 0);
 213   expn = gimple_call_arg (pow_call, 1);
 214
 215   if (!check_target_format (expn))
 216     return false;
 217
 218   bc = TREE_CODE (base);
 219   ec = TREE_CODE (expn);
 220
 221   /* Folding candidates are not interesting.
 222      Can actually assert that it is already folded.  */
 223   if (ec == REAL_CST && bc == REAL_CST)
 224     return false;
 225
 226   if (bc == REAL_CST)
 227     {
 228       /* Only handle a fixed range of constant.  */
 229       REAL_VALUE_TYPE mv;
 230       REAL_VALUE_TYPE bcv = TREE_REAL_CST (base);
 231       if (real_equal (&bcv, &dconst1))
 232         return false;
 233       if (real_less (&bcv, &dconst1))
 234         return false;
 235       real_from_integer (&mv, TYPE_MODE (TREE_TYPE (base)), 256, UNSIGNED);
 236       if (real_less (&mv, &bcv))
 237         return false;
 238       return true;
 239     }
 240   else if (bc == SSA_NAME)
 241     {
 242       tree base_val0, type;
 243       gimple *base_def;
 244       int bit_sz;
 245
 246       /* Only handles cases where base value is converted
 247          from integer values.  */
 248       base_def = SSA_NAME_DEF_STMT (base);
 249       if (gimple_code (base_def) != GIMPLE_ASSIGN)
 250         return false;
 251
 252       if (gimple_assign_rhs_code (base_def) != FLOAT_EXPR)
 253         return false;
 254       base_val0 = gimple_assign_rhs1 (base_def);
 255
 256       type = TREE_TYPE (base_val0);
 257       if (TREE_CODE (type) != INTEGER_TYPE)
 258         return false;
 259       bit_sz = TYPE_PRECISION (type);
 260       /* If the type of the base is too wide,
 261          the resulting shrink wrapping condition
 262          will be too conservative.  */
 263       if (bit_sz > MAX_BASE_INT_BIT_SIZE)
 264         return false;
 265
 266       return true;
 267     }
 268   else
 269     return false;
 270 }
 271
 272 /* A helper function to help select candidate function calls that are
 273    suitable for conditional DCE.  Candidate functions must have single
 274    valid input domain in this implementation except for pow (see check_pow).
 275    Returns true if the function call is a candidate.  */
 276
 277 static bool
 278 check_builtin_call (gcall *bcall)
 279 {
 280   tree arg;
 281
 282   arg = gimple_call_arg (bcall, 0);
 283   return check_target_format (arg);
 284 }
 285
 286 /* Return true if built-in function call CALL calls a math function
 287    and if we know how to test the range of its arguments to detect _most_
 288    situations in which errno is not set.  The test must err on the side
 289    of treating non-erroneous values as potentially erroneous.  */
 290
 291 static bool
 292 can_test_argument_range (gcall *call)
 293 {
 294   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (call)))
 295     {
 296     /* Trig functions.  */
 297     CASE_FLT_FN (BUILT_IN_ACOS):
 298     CASE_FLT_FN (BUILT_IN_ASIN):
 299     /* Hyperbolic functions.  */
 300     CASE_FLT_FN (BUILT_IN_ACOSH):
 301     CASE_FLT_FN (BUILT_IN_ATANH):
 302     CASE_FLT_FN (BUILT_IN_COSH):
 303     CASE_FLT_FN (BUILT_IN_SINH):
 304     /* Log functions.  */
 305     CASE_FLT_FN (BUILT_IN_LOG):
 306     CASE_FLT_FN (BUILT_IN_LOG2):
 307     CASE_FLT_FN (BUILT_IN_LOG10):
 308     CASE_FLT_FN (BUILT_IN_LOG1P):
 309     /* Exp functions.  */
 310     CASE_FLT_FN (BUILT_IN_EXP):
 311     CASE_FLT_FN (BUILT_IN_EXP2):
 312     CASE_FLT_FN (BUILT_IN_EXP10):
 313     CASE_FLT_FN (BUILT_IN_EXPM1):
 314     CASE_FLT_FN (BUILT_IN_POW10):
 315     /* Sqrt.  */
 316     CASE_FLT_FN (BUILT_IN_SQRT):
 317     CASE_FLT_FN_FLOATN_NX (BUILT_IN_SQRT):
 318       return check_builtin_call (call);
 319     /* Special one: two argument pow.  */
 320     case BUILT_IN_POW:
 321       return check_pow (call);
 322     default:
 323       break;
 324     }
 325
 326   return false;
 327 }
 328
 329 /* Return true if CALL can produce a domain error (EDOM) but can never
 330    produce a pole, range overflow or range underflow error (all ERANGE).
 331    This means that we can tell whether a function would have set errno
 332    by testing whether the result is a NaN.  */
 333
 334 static bool
 335 edom_only_function (gcall *call)
 336 {
 337   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (call)))
 338     {
 339     CASE_FLT_FN (BUILT_IN_ACOS):
 340     CASE_FLT_FN (BUILT_IN_ASIN):
 341     CASE_FLT_FN (BUILT_IN_ATAN):
 342     CASE_FLT_FN (BUILT_IN_COS):
 343     CASE_FLT_FN (BUILT_IN_SIGNIFICAND):
 344     CASE_FLT_FN (BUILT_IN_SIN):
 345     CASE_FLT_FN (BUILT_IN_SQRT):
 346     CASE_FLT_FN_FLOATN_NX (BUILT_IN_SQRT):
 347     CASE_FLT_FN (BUILT_IN_FMOD):
 348     CASE_FLT_FN (BUILT_IN_REMAINDER):
 349       return true;
 350
 351     default:
 352       return false;
 353     }
 354 }
 355
 356 /* Return true if it is structurally possible to guard CALL.  */
 357
 358 static bool
 359 can_guard_call_p (gimple *call)
 360 {
 361   return (!stmt_ends_bb_p (call)
 362           || find_fallthru_edge (gimple_bb (call)->succs));
 363 }
 364 \f
 365 /* A helper function to generate gimple statements for one bound
 366    comparison, so that the built-in function is called whenever
 367    TCODE <ARG, LBUB> is *false*.  TEMP_NAME1/TEMP_NAME2 are names
 368    of the temporaries, CONDS is a vector holding the produced GIMPLE
 369    statements, and NCONDS points to the variable holding the number of
 370    logical comparisons.  CONDS is either empty or a list ended with a
 371    null tree.  */
 372
 373 static void
 374 gen_one_condition (tree arg, int lbub,
 375                    enum tree_code tcode,
 376                    const char *temp_name1,
 377                    const char *temp_name2,
 378                    vec<gimple *> conds,
 379                    unsigned *nconds)
 380 {
 381   tree lbub_real_cst, lbub_cst, float_type;
 382   tree temp, tempn, tempc, tempcn;
 383   gassign *stmt1;
 384   gassign *stmt2;
 385   gcond *stmt3;
 386
 387   float_type = TREE_TYPE (arg);
 388   lbub_cst = build_int_cst (integer_type_node, lbub);
 389   lbub_real_cst = build_real_from_int_cst (float_type, lbub_cst);
 390
 391   temp = create_tmp_var (float_type, temp_name1);
 392   stmt1 = gimple_build_assign (temp, arg);
 393   tempn = make_ssa_name (temp, stmt1);
 394   gimple_assign_set_lhs (stmt1, tempn);
 395
 396   tempc = create_tmp_var (boolean_type_node, temp_name2);
 397   stmt2 = gimple_build_assign (tempc,
 398                                fold_build2 (tcode,
 399                                             boolean_type_node,
 400                                             tempn, lbub_real_cst));
 401   tempcn = make_ssa_name (tempc, stmt2);
 402   gimple_assign_set_lhs (stmt2, tempcn);
 403
 404   stmt3 = gimple_build_cond_from_tree (tempcn, NULL_TREE, NULL_TREE);
 405   conds.quick_push (stmt1);
 406   conds.quick_push (stmt2);
 407   conds.quick_push (stmt3);
 408   (*nconds)++;
 409 }
 410
 411 /* A helper function to generate GIMPLE statements for
 412    out of input domain check.  ARG is the call argument
 413    to be runtime checked, DOMAIN holds the valid domain
 414    for the given function, CONDS points to the vector
 415    holding the result GIMPLE statements.  *NCONDS is
 416    the number of logical comparisons.  This function
 417    produces no more than two logical comparisons, one
 418    for lower bound check, one for upper bound check.  */
 419
 420 static void
 421 gen_conditions_for_domain (tree arg, inp_domain domain,
 422                            vec<gimple *> conds,
 423                            unsigned *nconds)
 424 {
 425   if (domain.has_lb)
 426     gen_one_condition (arg, domain.lb,
 427                        (domain.is_lb_inclusive
 428                         ? UNGE_EXPR : UNGT_EXPR),
 429                        "DCE_COND_LB", "DCE_COND_LB_TEST",
 430                        conds, nconds);
 431
 432   if (domain.has_ub)
 433     {
 434       /* Now push a separator.  */
 435       if (domain.has_lb)
 436         conds.quick_push (NULL);
 437
 438       gen_one_condition (arg, domain.ub,
 439                          (domain.is_ub_inclusive
 440                           ? UNLE_EXPR : UNLT_EXPR),
 441                          "DCE_COND_UB", "DCE_COND_UB_TEST",
 442                          conds, nconds);
 443     }
 444 }
 445
 446
 447 /* A helper function to generate condition
 448    code for the y argument in call pow (some_const, y).
 449    See candidate selection in check_pow.  Since the
 450    candidates' base values have a limited range,
 451    the guarded code generated for y are simple:
 452    if (__builtin_isgreater (y, max_y))
 453      pow (const, y);
 454    Note max_y can be computed separately for each
 455    const base, but in this implementation, we
 456    choose to compute it using the max base
 457    in the allowed range for the purpose of
 458    simplicity.  BASE is the constant base value,
 459    EXPN is the expression for the exponent argument,
 460    *CONDS is the vector to hold resulting statements,
 461    and *NCONDS is the number of logical conditions.  */
 462
 463 static void
 464 gen_conditions_for_pow_cst_base (tree base, tree expn,
 465                                  vec<gimple *> conds,
 466                                  unsigned *nconds)
 467 {
 468   inp_domain exp_domain;
 469   /* Validate the range of the base constant to make
 470      sure it is consistent with check_pow.  */
 471   REAL_VALUE_TYPE mv;
 472   REAL_VALUE_TYPE bcv = TREE_REAL_CST (base);
 473   gcc_assert (!real_equal (&bcv, &dconst1)
 474               && !real_less (&bcv, &dconst1));
 475   real_from_integer (&mv, TYPE_MODE (TREE_TYPE (base)), 256, UNSIGNED);
 476   gcc_assert (!real_less (&mv, &bcv));
 477
 478   exp_domain = get_domain (0, false, false,
 479                            127, true, false);
 480
 481   gen_conditions_for_domain (expn, exp_domain,
 482                              conds, nconds);
 483 }
 484
 485 /* Generate error condition code for pow calls with
 486    non constant base values.  The candidates selected
 487    have their base argument value converted from
 488    integer (see check_pow) value (1, 2, 4 bytes), and
 489    the max exp value is computed based on the size
 490    of the integer type (i.e. max possible base value).
 491    The resulting input domain for exp argument is thus
 492    conservative (smaller than the max value allowed by
 493    the runtime value of the base).  BASE is the integer
 494    base value, EXPN is the expression for the exponent
 495    argument, *CONDS is the vector to hold resulting
 496    statements, and *NCONDS is the number of logical
 497    conditions.  */
 498
 499 static void
 500 gen_conditions_for_pow_int_base (tree base, tree expn,
 501                                  vec<gimple *> conds,
 502                                  unsigned *nconds)
 503 {
 504   gimple *base_def;
 505   tree base_val0;
 506   tree int_type;
 507   tree temp, tempn;
 508   tree cst0;
 509   gimple *stmt1, *stmt2;
 510   int bit_sz, max_exp;
 511   inp_domain exp_domain;
 512
 513   base_def = SSA_NAME_DEF_STMT (base);
 514   base_val0 = gimple_assign_rhs1 (base_def);
 515   int_type = TREE_TYPE (base_val0);
 516   bit_sz = TYPE_PRECISION (int_type);
 517   gcc_assert (bit_sz > 0
 518               && bit_sz <= MAX_BASE_INT_BIT_SIZE);
 519
 520   /* Determine the max exp argument value according to
 521      the size of the base integer.  The max exp value
 522      is conservatively estimated assuming IEEE754 double
 523      precision format.  */
 524   if (bit_sz == 8)
 525     max_exp = 128;
 526   else if (bit_sz == 16)
 527     max_exp = 64;
 528   else
 529     {
 530       gcc_assert (bit_sz == MAX_BASE_INT_BIT_SIZE);
 531       max_exp = 32;
 532     }
 533
 534   /* For pow ((double)x, y), generate the following conditions:
 535      cond 1:
 536      temp1 = x;
 537      if (__builtin_islessequal (temp1, 0))
 538
 539      cond 2:
 540      temp2 = y;
 541      if (__builtin_isgreater (temp2, max_exp_real_cst))  */
 542
 543   /* Generate condition in reverse order -- first
 544      the condition for the exp argument.  */
 545
 546   exp_domain = get_domain (0, false, false,
 547                            max_exp, true, true);
 548
 549   gen_conditions_for_domain (expn, exp_domain,
 550                              conds, nconds);
 551
 552   /* Now generate condition for the base argument.
 553      Note it does not use the helper function
 554      gen_conditions_for_domain because the base
 555      type is integer.  */
 556
 557   /* Push a separator.  */
 558   conds.quick_push (NULL);
 559
 560   temp = create_tmp_var (int_type, "DCE_COND1");
 561   cst0 = build_int_cst (int_type, 0);
 562   stmt1 = gimple_build_assign (temp, base_val0);
 563   tempn = make_ssa_name (temp, stmt1);
 564   gimple_assign_set_lhs (stmt1, tempn);
 565   stmt2 = gimple_build_cond (GT_EXPR, tempn, cst0, NULL_TREE, NULL_TREE);
 566
 567   conds.quick_push (stmt1);
 568   conds.quick_push (stmt2);
 569   (*nconds)++;
 570 }
 571
 572 /* Method to generate conditional statements for guarding conditionally
 573    dead calls to pow.  One or more statements can be generated for
 574    each logical condition.  Statement groups of different conditions
 575    are separated by a NULL tree and they are stored in the vec
 576    conds.  The number of logical conditions are stored in *nconds.
 577
 578    See C99 standard, 7.12.7.4:2, for description of pow (x, y).
 579    The precise condition for domain errors are complex.  In this
 580    implementation, a simplified (but conservative) valid domain
 581    for x and y are used: x is positive to avoid dom errors, while
 582    y is smaller than a upper bound (depending on x) to avoid range
 583    errors.  Runtime code is generated to check x (if not constant)
 584    and y against the valid domain.  If it is out, jump to the call,
 585    otherwise the call is bypassed.  POW_CALL is the call statement,
 586    *CONDS is a vector holding the resulting condition statements,
 587    and *NCONDS is the number of logical conditions.  */
 588
 589 static void
 590 gen_conditions_for_pow (gcall *pow_call, vec<gimple *> conds,
 591                         unsigned *nconds)
 592 {
 593   tree base, expn;
 594   enum tree_code bc;
 595
 596   gcc_checking_assert (check_pow (pow_call));
 597
 598   *nconds = 0;
 599
 600   base = gimple_call_arg (pow_call, 0);
 601   expn = gimple_call_arg (pow_call, 1);
 602
 603   bc = TREE_CODE (base);
 604
 605   if (bc == REAL_CST)
 606     gen_conditions_for_pow_cst_base (base, expn, conds, nconds);
 607   else if (bc == SSA_NAME)
 608     gen_conditions_for_pow_int_base (base, expn, conds, nconds);
 609   else
 610     gcc_unreachable ();
 611 }
 612
 613 /* A helper routine to help computing the valid input domain
 614    for a builtin function.  See C99 7.12.7 for details.  In this
 615    implementation, we only handle single region domain.  The
 616    resulting region can be conservative (smaller) than the actual
 617    one and rounded to integers.  Some of the bounds are documented
 618    in the standard, while other limit constants are computed
 619    assuming IEEE floating point format (for SF and DF modes).
 620    Since IEEE only sets minimum requirements for long double format,
 621    different long double formats exist under different implementations
 622    (e.g, 64 bit double precision (DF), 80 bit double-extended
 623    precision (XF), and 128 bit quad precision (QF) ).  For simplicity,
 624    in this implementation, the computed bounds for long double assume
 625    64 bit format (DF), and are therefore conservative.  Another
 626    assumption is that single precision float type is always SF mode,
 627    and double type is DF mode.  This function is quite
 628    implementation specific, so it may not be suitable to be part of
 629    builtins.c.  This needs to be revisited later to see if it can
 630    be leveraged in x87 assembly expansion.  */
 631
 632 static inp_domain
 633 get_no_error_domain (enum built_in_function fnc)
 634 {
 635   switch (fnc)
 636     {
 637     /* Trig functions: return [-1, +1]  */
 638     CASE_FLT_FN (BUILT_IN_ACOS):
 639     CASE_FLT_FN (BUILT_IN_ASIN):
 640       return get_domain (-1, true, true,
 641                          1, true, true);
 642     /* Hyperbolic functions.  */
 643     CASE_FLT_FN (BUILT_IN_ACOSH):
 644       /* acosh: [1, +inf)  */
 645       return get_domain (1, true, true,
 646                          1, false, false);
 647     CASE_FLT_FN (BUILT_IN_ATANH):
 648       /* atanh: (-1, +1)  */
 649       return get_domain (-1, true, false,
 650                          1, true, false);
 651     case BUILT_IN_COSHF:
 652     case BUILT_IN_SINHF:
 653       /* coshf: (-89, +89)  */
 654       return get_domain (-89, true, false,
 655                          89, true, false);
 656     case BUILT_IN_COSH:
 657     case BUILT_IN_SINH:
 658     case BUILT_IN_COSHL:
 659     case BUILT_IN_SINHL:
 660       /* cosh: (-710, +710)  */
 661       return get_domain (-710, true, false,
 662                          710, true, false);
 663     /* Log functions: (0, +inf)  */
 664     CASE_FLT_FN (BUILT_IN_LOG):
 665     CASE_FLT_FN (BUILT_IN_LOG2):
 666     CASE_FLT_FN (BUILT_IN_LOG10):
 667       return get_domain (0, true, false,
 668                          0, false, false);
 669     CASE_FLT_FN (BUILT_IN_LOG1P):
 670       return get_domain (-1, true, false,
 671                          0, false, false);
 672     /* Exp functions.  */
 673     case BUILT_IN_EXPF:
 674     case BUILT_IN_EXPM1F:
 675       /* expf: (-inf, 88)  */
 676       return get_domain (-1, false, false,
 677                          88, true, false);
 678     case BUILT_IN_EXP:
 679     case BUILT_IN_EXPM1:
 680     case BUILT_IN_EXPL:
 681     case BUILT_IN_EXPM1L:
 682       /* exp: (-inf, 709)  */
 683       return get_domain (-1, false, false,
 684                          709, true, false);
 685     case BUILT_IN_EXP2F:
 686       /* exp2f: (-inf, 128)  */
 687       return get_domain (-1, false, false,
 688                          128, true, false);
 689     case BUILT_IN_EXP2:
 690     case BUILT_IN_EXP2L:
 691       /* exp2: (-inf, 1024)  */
 692       return get_domain (-1, false, false,
 693                          1024, true, false);
 694     case BUILT_IN_EXP10F:
 695     case BUILT_IN_POW10F:
 696       /* exp10f: (-inf, 38)  */
 697       return get_domain (-1, false, false,
 698                          38, true, false);
 699     case BUILT_IN_EXP10:
 700     case BUILT_IN_POW10:
 701     case BUILT_IN_EXP10L:
 702     case BUILT_IN_POW10L:
 703       /* exp10: (-inf, 308)  */
 704       return get_domain (-1, false, false,
 705                          308, true, false);
 706     /* sqrt: [0, +inf)  */
 707     CASE_FLT_FN (BUILT_IN_SQRT):
 708     CASE_FLT_FN_FLOATN_NX (BUILT_IN_SQRT):
 709       return get_domain (0, true, true,
 710                          0, false, false);
 711     default:
 712       gcc_unreachable ();
 713     }
 714
 715   gcc_unreachable ();
 716 }
 717
 718 /* The function to generate shrink wrap conditions for a partially
 719    dead builtin call whose return value is not used anywhere,
 720    but has to be kept live due to potential error condition.
 721    BI_CALL is the builtin call, CONDS is the vector of statements
 722    for condition code, NCODES is the pointer to the number of
 723    logical conditions.  Statements belonging to different logical
 724    condition are separated by NULL tree in the vector.  */
 725
 726 static void
 727 gen_shrink_wrap_conditions (gcall *bi_call, vec<gimple *> conds,
 728                             unsigned int *nconds)
 729 {
 730   gcall *call;
 731   tree fn;
 732   enum built_in_function fnc;
 733
 734   gcc_assert (nconds && conds.exists ());
 735   gcc_assert (conds.length () == 0);
 736   gcc_assert (is_gimple_call (bi_call));
 737
 738   call = bi_call;
 739   fn = gimple_call_fndecl (call);
 740   gcc_assert (fn && DECL_BUILT_IN (fn));
 741   fnc = DECL_FUNCTION_CODE (fn);
 742   *nconds = 0;
 743
 744   if (fnc == BUILT_IN_POW)
 745     gen_conditions_for_pow (call, conds, nconds);
 746   else
 747     {
 748       tree arg;
 749       inp_domain domain = get_no_error_domain (fnc);
 750       *nconds = 0;
 751       arg = gimple_call_arg (bi_call, 0);
 752       gen_conditions_for_domain (arg, domain, conds, nconds);
 753     }
 754
 755   return;
 756 }
 757
 758 /* Shrink-wrap BI_CALL so that it is only called when one of the NCONDS
 759    conditions in CONDS is false.  */
 760
 761 static void
 762 shrink_wrap_one_built_in_call_with_conds (gcall *bi_call, vec <gimple *> conds,
 763                                           unsigned int nconds)
 764 {
 765   gimple_stmt_iterator bi_call_bsi;
 766   basic_block bi_call_bb, join_tgt_bb, guard_bb;
 767   edge join_tgt_in_edge_from_call, join_tgt_in_edge_fall_thru;
 768   edge bi_call_in_edge0, guard_bb_in_edge;
 769   unsigned tn_cond_stmts;
 770   unsigned ci;
 771   gimple *cond_expr = NULL;
 772   gimple *cond_expr_start;
 773
 774   /* The cfg we want to create looks like this:
 775
 776            [guard n-1]         <- guard_bb (old block)
 777              |    \
 778              | [guard n-2]                   }
 779              |    / \                        }
 780              |   /  ...                      } new blocks
 781              |  /  [guard 0]                 }
 782              | /    /   |                    }
 783             [ call ]    |     <- bi_call_bb  }
 784              | \        |
 785              |  \       |
 786              |   [ join ]     <- join_tgt_bb (old iff call must end bb)
 787              |
 788          possible EH edges (only if [join] is old)
 789
 790      When [join] is new, the immediate dominators for these blocks are:
 791
 792      1. [guard n-1]: unchanged
 793      2. [call]: [guard n-1]
 794      3. [guard m]: [guard m+1] for 0 <= m <= n-2
 795      4. [join]: [guard n-1]
 796
 797      We punt for the more complex case case of [join] being old and
 798      simply free the dominance info.  We also punt on postdominators,
 799      which aren't expected to be available at this point anyway.  */
 800   bi_call_bb = gimple_bb (bi_call);
 801
 802   /* Now find the join target bb -- split bi_call_bb if needed.  */
 803   if (stmt_ends_bb_p (bi_call))
 804     {
 805       /* We checked that there was a fallthrough edge in
 806          can_guard_call_p.  */
 807       join_tgt_in_edge_from_call = find_fallthru_edge (bi_call_bb->succs);
 808       gcc_assert (join_tgt_in_edge_from_call);
 809       /* We don't want to handle PHIs.  */
 810       if (EDGE_COUNT (join_tgt_in_edge_from_call->dest->preds) > 1)
 811         join_tgt_bb = split_edge (join_tgt_in_edge_from_call);
 812       else
 813         {
 814           join_tgt_bb = join_tgt_in_edge_from_call->dest;
 815           /* We may have degenerate PHIs in the destination.  Propagate
 816              those out.  */
 817           for (gphi_iterator i = gsi_start_phis (join_tgt_bb); !gsi_end_p (i);)
 818             {
 819               gphi *phi = i.phi ();
 820               replace_uses_by (gimple_phi_result (phi),
 821                                gimple_phi_arg_def (phi, 0));
 822               remove_phi_node (&i, true);
 823             }
 824         }
 825     }
 826   else
 827     {
 828       join_tgt_in_edge_from_call = split_block (bi_call_bb, bi_call);
 829       join_tgt_bb = join_tgt_in_edge_from_call->dest;
 830     }
 831
 832   bi_call_bsi = gsi_for_stmt (bi_call);
 833
 834   /* Now it is time to insert the first conditional expression
 835      into bi_call_bb and split this bb so that bi_call is
 836      shrink-wrapped.  */
 837   tn_cond_stmts = conds.length ();
 838   cond_expr = NULL;
 839   cond_expr_start = conds[0];
 840   for (ci = 0; ci < tn_cond_stmts; ci++)
 841     {
 842       gimple *c = conds[ci];
 843       gcc_assert (c || ci != 0);
 844       if (!c)
 845         break;
 846       gsi_insert_before (&bi_call_bsi, c, GSI_SAME_STMT);
 847       cond_expr = c;
 848     }
 849   ci++;
 850   gcc_assert (cond_expr && gimple_code (cond_expr) == GIMPLE_COND);
 851
 852   typedef std::pair<edge, edge> edge_pair;
 853   auto_vec<edge_pair, 8> edges;
 854
 855   bi_call_in_edge0 = split_block (bi_call_bb, cond_expr);
 856   bi_call_in_edge0->flags &= ~EDGE_FALLTHRU;
 857   bi_call_in_edge0->flags |= EDGE_FALSE_VALUE;
 858   guard_bb = bi_call_bb;
 859   bi_call_bb = bi_call_in_edge0->dest;
 860   join_tgt_in_edge_fall_thru = make_edge (guard_bb, join_tgt_bb,
 861                                           EDGE_TRUE_VALUE);
 862
 863   edges.reserve (nconds);
 864   edges.quick_push (edge_pair (bi_call_in_edge0, join_tgt_in_edge_fall_thru));
 865
 866   /* Code generation for the rest of the conditions  */
 867   for (unsigned int i = 1; i < nconds; ++i)
 868     {
 869       unsigned ci0;
 870       edge bi_call_in_edge;
 871       gimple_stmt_iterator guard_bsi = gsi_for_stmt (cond_expr_start);
 872       ci0 = ci;
 873       cond_expr_start = conds[ci0];
 874       for (; ci < tn_cond_stmts; ci++)
 875         {
 876           gimple *c = conds[ci];
 877           gcc_assert (c || ci != ci0);
 878           if (!c)
 879             break;
 880           gsi_insert_before (&guard_bsi, c, GSI_SAME_STMT);
 881           cond_expr = c;
 882         }
 883       ci++;
 884       gcc_assert (cond_expr && gimple_code (cond_expr) == GIMPLE_COND);
 885       guard_bb_in_edge = split_block (guard_bb, cond_expr);
 886       guard_bb_in_edge->flags &= ~EDGE_FALLTHRU;
 887       guard_bb_in_edge->flags |= EDGE_TRUE_VALUE;
 888
 889       bi_call_in_edge = make_edge (guard_bb, bi_call_bb, EDGE_FALSE_VALUE);
 890       edges.quick_push (edge_pair (bi_call_in_edge, guard_bb_in_edge));
 891     }
 892
 893   /* Now update the probability and profile information, processing the
 894      guards in order of execution.
 895
 896      There are two approaches we could take here.  On the one hand we
 897      could assign a probability of X to the call block and distribute
 898      that probability among its incoming edges.  On the other hand we
 899      could assign a probability of X to each individual call edge.
 900
 901      The choice only affects calls that have more than one condition.
 902      In those cases, the second approach would give the call block
 903      a greater probability than the first.  However, the difference
 904      is only small, and our chosen X is a pure guess anyway.
 905
 906      Here we take the second approach because it's slightly simpler
 907      and because it's easy to see that it doesn't lose profile counts.  */
 908   bi_call_bb->count = profile_count::zero ();
 909   while (!edges.is_empty ())
 910     {
 911       edge_pair e = edges.pop ();
 912       edge call_edge = e.first;
 913       edge nocall_edge = e.second;
 914       basic_block src_bb = call_edge->src;
 915       gcc_assert (src_bb == nocall_edge->src);
 916
 917       call_edge->probability = profile_probability::very_unlikely ();
 918       nocall_edge->probability = profile_probability::always ()
 919                                  - call_edge->probability;
 920
 921       bi_call_bb->count += call_edge->count ();
 922
 923       if (nocall_edge->dest != join_tgt_bb)
 924         nocall_edge->dest->count = src_bb->count - bi_call_bb->count;
 925     }
 926
 927   if (dom_info_available_p (CDI_DOMINATORS))
 928     {
 929       /* The split_blocks leave [guard 0] as the immediate dominator
 930          of [call] and [call] as the immediate dominator of [join].
 931          Fix them up.  */
 932       set_immediate_dominator (CDI_DOMINATORS, bi_call_bb, guard_bb);
 933       set_immediate_dominator (CDI_DOMINATORS, join_tgt_bb, guard_bb);
 934     }
 935
 936   if (dump_file && (dump_flags & TDF_DETAILS))
 937     {
 938       location_t loc;
 939       loc = gimple_location (bi_call);
 940       fprintf (dump_file,
 941                "%s:%d: note: function call is shrink-wrapped"
 942                " into error conditions.\n",
 943                LOCATION_FILE (loc), LOCATION_LINE (loc));
 944     }
 945 }
 946
 947 /* Shrink-wrap BI_CALL so that it is only called when it might set errno
 948    (but is always called if it would set errno).  */
 949
 950 static void
 951 shrink_wrap_one_built_in_call (gcall *bi_call)
 952 {
 953   unsigned nconds = 0;
 954   auto_vec<gimple *, 12> conds;
 955   gen_shrink_wrap_conditions (bi_call, conds, &nconds);
 956   gcc_assert (nconds != 0);
 957   shrink_wrap_one_built_in_call_with_conds (bi_call, conds, nconds);
 958 }
 959
 960 /* Return true if built-in function call CALL could be implemented using
 961    a combination of an internal function to compute the result and a
 962    separate call to set errno.  */
 963
 964 static bool
 965 can_use_internal_fn (gcall *call)
 966 {
 967   /* Only replace calls that set errno.  */
 968   if (!gimple_vdef (call))
 969     return false;
 970
 971   /* See whether there is an internal function for this built-in.  */
 972   if (replacement_internal_fn (call) == IFN_LAST)
 973     return false;
 974
 975   /* See whether we can catch all cases where errno would be set,
 976      while still avoiding the call in most cases.  */
 977   if (!can_test_argument_range (call)
 978       && !edom_only_function (call))
 979     return false;
 980
 981   return true;
 982 }
 983
 984 /* Implement built-in function call CALL using an internal function.  */
 985
 986 static void
 987 use_internal_fn (gcall *call)
 988 {
 989   /* We'll be inserting another call with the same arguments after the
 990      lhs has been set, so prevent any possible coalescing failure from
 991      having both values live at once.  See PR 71020.  */
 992   replace_abnormal_ssa_names (call);
 993
 994   unsigned nconds = 0;
 995   auto_vec<gimple *, 12> conds;
 996   if (can_test_argument_range (call))
 997     {
 998       gen_shrink_wrap_conditions (call, conds, &nconds);
 999       gcc_assert (nconds != 0);
1000     }
1001   else
1002     gcc_assert (edom_only_function (call));
1003
1004   internal_fn ifn = replacement_internal_fn (call);
1005   gcc_assert (ifn != IFN_LAST);
1006
1007   /* Construct the new call, with the same arguments as the original one.  */
1008   auto_vec <tree, 16> args;
1009   unsigned int nargs = gimple_call_num_args (call);
1010   for (unsigned int i = 0; i < nargs; ++i)
1011     args.safe_push (gimple_call_arg (call, i));
1012   gcall *new_call = gimple_build_call_internal_vec (ifn, args);
1013   gimple_set_location (new_call, gimple_location (call));
1014   gimple_call_set_nothrow (new_call, gimple_call_nothrow_p (call));
1015
1016   /* Transfer the LHS to the new call.  */
1017   tree lhs = gimple_call_lhs (call);
1018   gimple_call_set_lhs (new_call, lhs);
1019   gimple_call_set_lhs (call, NULL_TREE);
1020   SSA_NAME_DEF_STMT (lhs) = new_call;
1021
1022   /* Insert the new call.  */
1023   gimple_stmt_iterator gsi = gsi_for_stmt (call);
1024   gsi_insert_before (&gsi, new_call, GSI_SAME_STMT);
1025
1026   if (nconds == 0)
1027     {
1028       /* Skip the call if LHS == LHS.  If we reach here, EDOM is the only
1029          valid errno value and it is used iff the result is NaN.  */
1030       conds.quick_push (gimple_build_cond (EQ_EXPR, lhs, lhs,
1031                                            NULL_TREE, NULL_TREE));
1032       nconds++;
1033
1034       /* Try replacing the original call with a direct assignment to
1035          errno, via an internal function.  */
1036       if (set_edom_supported_p () && !stmt_ends_bb_p (call))
1037         {
1038           gimple_stmt_iterator gsi = gsi_for_stmt (call);
1039           gcall *new_call = gimple_build_call_internal (IFN_SET_EDOM, 0);
1040           gimple_set_vuse (new_call, gimple_vuse (call));
1041           gimple_set_vdef (new_call, gimple_vdef (call));
1042           SSA_NAME_DEF_STMT (gimple_vdef (new_call)) = new_call;
1043           gimple_set_location (new_call, gimple_location (call));
1044           gsi_replace (&gsi, new_call, false);
1045           call = new_call;
1046         }
1047     }
1048
1049   shrink_wrap_one_built_in_call_with_conds (call, conds, nconds);
1050 }
1051
1052 /* The top level function for conditional dead code shrink
1053    wrapping transformation.  */
1054
1055 static void
1056 shrink_wrap_conditional_dead_built_in_calls (vec<gcall *> calls)
1057 {
1058   unsigned i = 0;
1059
1060   unsigned n = calls.length ();
1061   for (; i < n ; i++)
1062     {
1063       gcall *bi_call = calls[i];
1064       if (gimple_call_lhs (bi_call))
1065         use_internal_fn (bi_call);
1066       else
1067         shrink_wrap_one_built_in_call (bi_call);
1068     }
1069 }
1070
1071 namespace {
1072
1073 const pass_data pass_data_call_cdce =
1074 {
1075   GIMPLE_PASS, /* type */
1076   "cdce", /* name */
1077   OPTGROUP_NONE, /* optinfo_flags */
1078   TV_TREE_CALL_CDCE, /* tv_id */
1079   ( PROP_cfg | PROP_ssa ), /* properties_required */
1080   0, /* properties_provided */
1081   0, /* properties_destroyed */
1082   0, /* todo_flags_start */
1083   0, /* todo_flags_finish */
1084 };
1085
1086 class pass_call_cdce : public gimple_opt_pass
1087 {
1088 public:
1089   pass_call_cdce (gcc::context *ctxt)
1090     : gimple_opt_pass (pass_data_call_cdce, ctxt)
1091   {}
1092
1093   /* opt_pass methods: */
1094   virtual bool gate (function *)
1095     {
1096       /* The limit constants used in the implementation
1097          assume IEEE floating point format.  Other formats
1098          can be supported in the future if needed.  */
1099       return flag_tree_builtin_call_dce != 0;
1100     }
1101
1102   virtual unsigned int execute (function *);
1103
1104 }; // class pass_call_cdce
1105
1106 unsigned int
1107 pass_call_cdce::execute (function *fun)
1108 {
1109   basic_block bb;
1110   gimple_stmt_iterator i;
1111   auto_vec<gcall *> cond_dead_built_in_calls;
1112   FOR_EACH_BB_FN (bb, fun)
1113     {
1114       /* Skip blocks that are being optimized for size, since our
1115          transformation always increases code size.  */
1116       if (optimize_bb_for_size_p (bb))
1117         continue;
1118
1119       /* Collect dead call candidates.  */
1120       for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i))
1121         {
1122           gcall *stmt = dyn_cast <gcall *> (gsi_stmt (i));
1123           if (stmt
1124               && gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)
1125               && (gimple_call_lhs (stmt)
1126                   ? can_use_internal_fn (stmt)
1127                   : can_test_argument_range (stmt))
1128               && can_guard_call_p (stmt))
1129             {
1130               if (dump_file && (dump_flags & TDF_DETAILS))
1131                 {
1132                   fprintf (dump_file, "Found conditional dead call: ");
1133                   print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1134                   fprintf (dump_file, "\n");
1135                 }
1136               if (!cond_dead_built_in_calls.exists ())
1137                 cond_dead_built_in_calls.create (64);
1138               cond_dead_built_in_calls.safe_push (stmt);
1139             }
1140         }
1141     }
1142
1143   if (!cond_dead_built_in_calls.exists ())
1144     return 0;
1145
1146   shrink_wrap_conditional_dead_built_in_calls (cond_dead_built_in_calls);
1147   free_dominance_info (CDI_POST_DOMINATORS);
1148   /* As we introduced new control-flow we need to insert PHI-nodes
1149      for the call-clobbers of the remaining call.  */
1150   mark_virtual_operands_for_renaming (fun);
1151   return TODO_update_ssa;
1152 }
1153
1154 } // anon namespace
1155
1156 gimple_opt_pass *
1157 make_pass_call_cdce (gcc::context *ctxt)
1158 {
1159   return new pass_call_cdce (ctxt);
1160 }