gcc/config/aarch64/aarch64-speculation.cc

   1 /* Speculation tracking and mitigation (e.g. CVE 2017-5753) for AArch64.
   2    Copyright (C) 2018-2024 Free Software Foundation, Inc.
   3    Contributed by ARM Ltd.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but
  13    WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GCC; see the file COPYING3.  If not see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "target.h"
  25 #include "rtl.h"
  26 #include "tree-pass.h"
  27 #include "profile-count.h"
  28 #include "backend.h"
  29 #include "cfgbuild.h"
  30 #include "print-rtl.h"
  31 #include "cfgrtl.h"
  32 #include "function.h"
  33 #include "basic-block.h"
  34 #include "memmodel.h"
  35 #include "emit-rtl.h"
  36 #include "insn-attr.h"
  37 #include "df.h"
  38 #include "tm_p.h"
  39 #include "insn-config.h"
  40 #include "recog.h"
  41
  42 /* This pass scans the RTL insns late in the RTL pipeline.  The aim is
  43    to identify all places where there is conditional control flow and
  44    to insert code that tracks any speculative execution of a conditional
  45    branch.
  46
  47    To do this we reserve a call-clobbered register (so that it can be
  48    initialized very early in the function prologue) that can then be
  49    updated each time there is a conditional branch.  At each such
  50    branch we then generate a code sequence that uses conditional
  51    select operations that are not subject to speculation themselves
  52    (we ignore for the moment situations where that might not always be
  53    strictly true).  For example, a branch sequence such as:
  54
  55         B.EQ    <dst>
  56         ...
  57    <dst>:
  58
  59    is transformed to:
  60
  61         B.EQ    <dst>
  62         CSEL    tracker, tracker, XZr, ne
  63         ...
  64    <dst>:
  65         CSEL    tracker, tracker, XZr, eq
  66
  67    Since we start with the tracker initialized to all bits one, if at any
  68    time the predicted control flow diverges from the architectural program
  69    behavior, then the tracker will become zero (but not otherwise).
  70
  71    The tracker value can be used at any time at which a value needs
  72    guarding against incorrect speculation.  This can be done in
  73    several ways, but they all amount to the same thing.  For an
  74    untrusted address, or an untrusted offset to a trusted address, we
  75    can simply mask the address with the tracker with the untrusted
  76    value.  If the CPU is not speculating, or speculating correctly,
  77    then the value will remain unchanged, otherwise it will be clamped
  78    to zero.  For more complex scenarios we can compare the tracker
  79    against zero and use the flags to form a new selection with an
  80    alternate safe value.
  81
  82    On implementations where the data processing instructions may
  83    themselves produce speculative values, the architecture requires
  84    that a CSDB instruction will resolve such data speculation, so each
  85    time we use the tracker for protecting a vulnerable value we also
  86    emit a CSDB: we do not need to do that each time the tracker itself
  87    is updated.
  88
  89    At function boundaries, we need to communicate the speculation
  90    tracking state with the caller or the callee.  This is tricky
  91    because there is no register available for such a purpose without
  92    creating a new ABI.  We deal with this by relying on the principle
  93    that in all real programs the stack pointer, SP will never be NULL
  94    at a function boundary; we can thus encode the speculation state in
  95    SP by clearing SP if the speculation tracker itself is NULL.  After
  96    the call we recover the tracking state back from SP into the
  97    tracker register.  The results is that a function call sequence is
  98    transformed to
  99
 100         MOV     tmp, SP
 101         AND     tmp, tmp, tracker
 102         MOV     SP, tmp
 103         BL      <callee>
 104         CMP     SP, #0
 105         CSETM   tracker, ne
 106
 107    The additional MOV instructions in the pre-call sequence are needed
 108    because SP cannot be used directly with the AND instruction.
 109
 110    The code inside a function body uses the post-call sequence in the
 111    prologue to establish the tracker and the pre-call sequence in the
 112    epilogue to re-encode the state for the return.
 113
 114    The code sequences have the nice property that if called from, or
 115    calling a function that does not track speculation then the stack pointer
 116    will always be non-NULL and hence the tracker will be initialized to all
 117    bits one as we need: we lose the ability to fully track speculation in that
 118    case, but we are still architecturally safe.
 119
 120    Tracking speculation in this way is quite expensive, both in code
 121    size and execution time.  We employ a number of tricks to try to
 122    limit this:
 123
 124    1) Simple leaf functions with no conditional branches (or use of
 125    the tracker) do not need to establish a new tracker: they simply
 126    carry the tracking state through SP for the duration of the call.
 127    The same is also true for leaf functions that end in a tail-call.
 128
 129    2) Back-to-back function calls in a single basic block also do not
 130    need to re-establish the tracker between the calls.  Again, we can
 131    carry the tracking state in SP for this period of time unless the
 132    tracker value is needed at that point in time.
 133
 134    We run the pass while the CFG is still present so that we can handle
 135    most of the conditional branch cases using the standard edge insertion
 136    code.  Where possible, we prefer to run the pass just before the final
 137    branch reorganization pass.  That pass will then hopefully clean things
 138    up afterwards so that the results aren't too horrible.
 139
 140    However, we must run the pass after all conditional branches have
 141    been inserted.  switch_pstate_sm inserts conditional branches for
 142    streaming-compatible code, and so for streaming-compatible functions,
 143    this pass must run after that one.
 144
 145    We handle this by having two copies of the pass: the normal one that
 146    runs before branch reorganization, and a "late" one that runs just
 147    before late_thread_prologue_and_epilogue.  The two passes have
 148    mutually exclusive gates, with the normal pass being chosen wherever
 149    possible.  */
 150
 151 /* Generate a code sequence to clobber SP if speculating incorreclty.  */
 152 static rtx_insn *
 153 aarch64_speculation_clobber_sp ()
 154 {
 155   rtx sp = gen_rtx_REG (DImode, SP_REGNUM);
 156   rtx tracker = gen_rtx_REG (DImode, SPECULATION_TRACKER_REGNUM);
 157   rtx scratch = gen_rtx_REG (DImode, SPECULATION_SCRATCH_REGNUM);
 158
 159   start_sequence ();
 160   emit_insn (gen_rtx_SET (scratch, sp));
 161   emit_insn (gen_anddi3 (scratch, scratch, tracker));
 162   emit_insn (gen_rtx_SET (sp, scratch));
 163   rtx_insn *seq = get_insns ();
 164   end_sequence ();
 165   return seq;
 166 }
 167
 168 /* Generate a code sequence to establish the tracker variable from the
 169    contents of SP.  */
 170 static rtx_insn *
 171 aarch64_speculation_establish_tracker ()
 172 {
 173   rtx sp = gen_rtx_REG (DImode, SP_REGNUM);
 174   rtx tracker = gen_rtx_REG (DImode, SPECULATION_TRACKER_REGNUM);
 175   start_sequence ();
 176   rtx cc = aarch64_gen_compare_reg (EQ, sp, const0_rtx);
 177   emit_insn (gen_cstoredi_neg (tracker,
 178                                gen_rtx_NE (CCmode, cc, const0_rtx), cc));
 179   rtx_insn *seq = get_insns ();
 180   end_sequence ();
 181   return seq;
 182 }
 183
 184 /* Main speculation tracking pass.  */
 185 unsigned int
 186 aarch64_do_track_speculation ()
 187 {
 188   basic_block bb;
 189   bool needs_tracking = false;
 190   bool need_second_pass = false;
 191   rtx_insn *insn;
 192   int fixups_pending = 0;
 193
 194   FOR_EACH_BB_FN (bb, cfun)
 195     {
 196       insn = BB_END (bb);
 197
 198       if (dump_file)
 199         fprintf (dump_file, "Basic block %d:\n", bb->index);
 200
 201       while (insn != BB_HEAD (bb)
 202              && NOTE_P (insn))
 203         insn = PREV_INSN (insn);
 204
 205       if (control_flow_insn_p (insn))
 206         {
 207           if (any_condjump_p (insn))
 208             {
 209               if (dump_file)
 210                 {
 211                   fprintf (dump_file, "  condjump\n");
 212                   dump_insn_slim (dump_file, insn);
 213                 }
 214
 215               rtx src = SET_SRC (pc_set (insn));
 216
 217               /* Check for an inverted jump, where the fall-through edge
 218                  appears first.  */
 219               bool inverted = GET_CODE (XEXP (src, 2)) != PC;
 220               /* The other edge must be the PC (we assume that we don't
 221                  have conditional return instructions).  */
 222               gcc_assert (GET_CODE (XEXP (src, 1 + !inverted)) == PC);
 223
 224               rtx cond = copy_rtx (XEXP (src, 0));
 225               gcc_assert (COMPARISON_P (cond)
 226                           && REG_P (XEXP (cond, 0))
 227                           && REGNO (XEXP (cond, 0)) == CC_REGNUM
 228                           && XEXP (cond, 1) == const0_rtx);
 229               rtx branch_tracker = gen_speculation_tracker (copy_rtx (cond));
 230               rtx fallthru_tracker = gen_speculation_tracker_rev (cond);
 231               if (inverted)
 232                 std::swap (branch_tracker, fallthru_tracker);
 233
 234               insert_insn_on_edge (branch_tracker, BRANCH_EDGE (bb));
 235               insert_insn_on_edge (fallthru_tracker, FALLTHRU_EDGE (bb));
 236               needs_tracking = true;
 237             }
 238           else if (GET_CODE (PATTERN (insn)) == RETURN)
 239             {
 240               /* If we already know we'll need a second pass, don't put
 241                  out the return sequence now, or we might end up with
 242                  two copies.  Instead, we'll do all return statements
 243                  during the second pass.  However, if this is the
 244                  first return insn we've found and we already
 245                  know that we'll need to emit the code, we can save a
 246                  second pass by emitting the code now.  */
 247               if (needs_tracking && ! need_second_pass)
 248                 {
 249                   rtx_insn *seq = aarch64_speculation_clobber_sp ();
 250                   emit_insn_before (seq, insn);
 251                 }
 252               else
 253                 {
 254                   fixups_pending++;
 255                   need_second_pass = true;
 256                 }
 257             }
 258           else if (find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX))
 259             {
 260               rtx_insn *seq = aarch64_speculation_clobber_sp ();
 261               emit_insn_before (seq, insn);
 262               needs_tracking = true;
 263             }
 264         }
 265       else
 266         {
 267           if (dump_file)
 268             {
 269               fprintf (dump_file, "  other\n");
 270               dump_insn_slim (dump_file, insn);
 271             }
 272         }
 273     }
 274
 275   FOR_EACH_BB_FN (bb, cfun)
 276     {
 277       rtx_insn *end = BB_END (bb);
 278       rtx_insn *call_insn = NULL;
 279
 280       if (bb->flags & BB_NON_LOCAL_GOTO_TARGET)
 281         {
 282           rtx_insn *label = NULL;
 283           /* For non-local goto targets we have to recover the
 284              speculation state from SP.  Find the last code label at
 285              the head of the block and place the fixup sequence after
 286              that.  */
 287           for (insn = BB_HEAD (bb); insn != end; insn = NEXT_INSN (insn))
 288             {
 289               if (LABEL_P (insn))
 290                 label = insn;
 291               /* Never put anything before the basic block note.  */
 292               if (NOTE_INSN_BASIC_BLOCK_P (insn))
 293                 label = insn;
 294               if (INSN_P (insn))
 295                 break;
 296             }
 297
 298           gcc_assert (label);
 299           emit_insn_after (aarch64_speculation_establish_tracker (), label);
 300         }
 301
 302       /* Scan the insns looking for calls.  We need to pass the
 303          speculation tracking state encoded in to SP.  After a call we
 304          restore the speculation tracking into the tracker register.
 305          To avoid unnecessary transfers we look for two or more calls
 306          within a single basic block and eliminate, where possible,
 307          any redundant operations.  */
 308       for (insn = BB_HEAD (bb); ; insn = NEXT_INSN (insn))
 309         {
 310           if (NONDEBUG_INSN_P (insn)
 311               && recog_memoized (insn) >= 0
 312               && (get_attr_speculation_barrier (insn)
 313                   == SPECULATION_BARRIER_TRUE))
 314             {
 315               if (call_insn)
 316                 {
 317                   /* This instruction requires the speculation
 318                      tracking to be in the tracker register.  If there
 319                      was an earlier call in this block, we need to
 320                      copy the speculation tracking back there.  */
 321                   emit_insn_after (aarch64_speculation_establish_tracker (),
 322                                    call_insn);
 323                   call_insn = NULL;
 324                 }
 325
 326               needs_tracking = true;
 327             }
 328
 329           if (CALL_P (insn)
 330               || (NONDEBUG_INSN_P (insn)
 331                   && recog_memoized (insn) >= 0
 332                   && get_attr_is_call (insn) == IS_CALL_YES))
 333             {
 334               bool tailcall
 335                 = (CALL_P (insn)
 336                    && (SIBLING_CALL_P (insn)
 337                        || find_reg_note (insn, REG_NORETURN, NULL_RTX)));
 338
 339               /* Tailcalls are like returns, we can eliminate the
 340                  transfer between the tracker register and SP if we
 341                  know that this function does not itself need
 342                  tracking.  */
 343               if (tailcall && (need_second_pass || !needs_tracking))
 344                 {
 345                   /* Don't clear call_insn if it is set - needs_tracking
 346                      will be true in that case and so we will end
 347                      up putting out mitigation sequences.  */
 348                   fixups_pending++;
 349                   need_second_pass = true;
 350                   break;
 351                 }
 352
 353               needs_tracking = true;
 354
 355               /* We always need a transfer before the first call in a BB.  */
 356               if (!call_insn)
 357                 emit_insn_before (aarch64_speculation_clobber_sp (), insn);
 358
 359               /* Tail-calls and no-return calls don't need any post-call
 360                  reestablishment of the tracker.  */
 361               if (! tailcall)
 362                 call_insn = insn;
 363               else
 364                 call_insn = NULL;
 365             }
 366
 367           if (insn == end)
 368             break;
 369         }
 370
 371       if (call_insn)
 372         {
 373           rtx_insn *seq = aarch64_speculation_establish_tracker ();
 374
 375           /* Handle debug insns at the end of the BB.  Put the extra
 376              insns after them.  This ensures that we have consistent
 377              behaviour for the placement of the extra insns between
 378              debug and non-debug builds.  */
 379           for (insn = call_insn;
 380                insn != end && DEBUG_INSN_P (NEXT_INSN (insn));
 381                insn = NEXT_INSN (insn))
 382             ;
 383
 384           if (insn == end)
 385             {
 386               edge e = find_fallthru_edge (bb->succs);
 387               /* We need to be very careful about some calls that
 388                  appear at the end of a basic block.  If the call
 389                  involves exceptions, then the compiler may depend on
 390                  this being the last instruction in the block.  The
 391                  easiest way to handle this is to commit the new
 392                  instructions on the fall-through edge and to let
 393                  commit_edge_insertions clean things up for us.
 394
 395                  Sometimes, eg with OMP, there may not even be an
 396                  outgoing edge after the call.  In that case, there's
 397                  not much we can do, presumably the compiler has
 398                  decided that the call can never return in this
 399                  context.  */
 400               if (e)
 401                 {
 402                   /* We need to set the location lists explicitly in
 403                      this case.  */
 404                   if (! INSN_P (seq))
 405                     {
 406                       start_sequence ();
 407                       emit_insn (seq);
 408                       seq = get_insns ();
 409                       end_sequence ();
 410                     }
 411
 412                   for (rtx_insn *list = seq; list; list = NEXT_INSN (list))
 413                     INSN_LOCATION (list) = INSN_LOCATION (call_insn);
 414
 415                   insert_insn_on_edge (seq, e);
 416                 }
 417             }
 418           else
 419             emit_insn_after (seq, call_insn);
 420         }
 421     }
 422
 423   if (needs_tracking)
 424     {
 425       if (need_second_pass)
 426         {
 427           /* We found a return instruction before we found out whether
 428              or not we need to emit the tracking code, but we now
 429              know we do.  Run quickly over the basic blocks and
 430              fix up the return insns.  */
 431           FOR_EACH_BB_FN (bb, cfun)
 432             {
 433               insn = BB_END (bb);
 434
 435               while (insn != BB_HEAD (bb)
 436                      && NOTE_P (insn))
 437                 insn = PREV_INSN (insn);
 438
 439               if ((control_flow_insn_p (insn)
 440                    && GET_CODE (PATTERN (insn)) == RETURN)
 441                   || (CALL_P (insn)
 442                       && (SIBLING_CALL_P (insn)
 443                           || find_reg_note (insn, REG_NORETURN, NULL_RTX))))
 444                 {
 445                   rtx_insn *seq = aarch64_speculation_clobber_sp ();
 446                   emit_insn_before (seq, insn);
 447                   fixups_pending--;
 448                 }
 449             }
 450           gcc_assert (fixups_pending == 0);
 451         }
 452
 453       /* Set up the initial value of the tracker, using the incoming SP.  */
 454       insert_insn_on_edge (aarch64_speculation_establish_tracker (),
 455                            single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
 456       commit_edge_insertions ();
 457     }
 458
 459   return 0;
 460 }
 461
 462 namespace {
 463
 464 const pass_data pass_data_aarch64_track_speculation =
 465 {
 466   RTL_PASS,             /* type.  */
 467   "speculation",        /* name.  */
 468   OPTGROUP_NONE,        /* optinfo_flags.  */
 469   TV_MACH_DEP,          /* tv_id.  */
 470   0,                    /* properties_required.  */
 471   0,                    /* properties_provided.  */
 472   0,                    /* properties_destroyed.  */
 473   0,                    /* todo_flags_start.  */
 474   0                     /* todo_flags_finish.  */
 475 };
 476
 477 class pass_track_speculation : public rtl_opt_pass
 478 {
 479 public:
 480   pass_track_speculation(gcc::context *ctxt, bool is_late)
 481     : rtl_opt_pass(pass_data_aarch64_track_speculation, ctxt),
 482       is_late (is_late)
 483   {}
 484
 485   /* opt_pass methods:  */
 486   virtual bool gate (function *)
 487     {
 488       return (aarch64_track_speculation
 489               && (is_late == bool (TARGET_STREAMING_COMPATIBLE)));
 490     }
 491
 492   virtual unsigned int execute (function *)
 493     {
 494       return aarch64_do_track_speculation ();
 495     }
 496
 497 private:
 498   /* Whether this is the late pass that runs before late prologue/epilogue
 499      insertion, or the normal pass that runs before branch reorganization.  */
 500   bool is_late;
 501 }; // class pass_track_speculation.
 502 } // anon namespace.
 503
 504 /* Create a new pass instance.  */
 505 rtl_opt_pass *
 506 make_pass_track_speculation (gcc::context *ctxt)
 507 {
 508   return new pass_track_speculation (ctxt, /*is_late=*/false);
 509 }
 510
 511 rtl_opt_pass *
 512 make_pass_late_track_speculation (gcc::context *ctxt)
 513 {
 514   return new pass_track_speculation (ctxt, /*is_late=*/true);
 515 }