gas/config/tc-i386.c

   1 /* tc-i386.c -- Assemble code for the Intel 80386
   2    Copyright (C) 1989-2024 Free Software Foundation, Inc.
   3
   4    This file is part of GAS, the GNU Assembler.
   5
   6    GAS is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GAS is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GAS; see the file COPYING.  If not, write to the Free
  18    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
  19    02110-1301, USA.  */
  20
  21 /* Intel 80386 machine specific gas.
  22    Written by Eliot Dresselhaus (eliot@mgm.mit.edu).
  23    x86_64 support by Jan Hubicka (jh@suse.cz)
  24    VIA PadLock support by Michal Ludvig (mludvig@suse.cz)
  25    Bugs & suggestions are completely welcome.  This is free software.
  26    Please help us make it better.  */
  27
  28 #include "as.h"
  29 #include "safe-ctype.h"
  30 #include "subsegs.h"
  31 #include "dwarf2dbg.h"
  32 #include "dw2gencfi.h"
  33 #include "scfi.h"
  34 #include "gen-sframe.h"
  35 #include "sframe.h"
  36 #include "elf/x86-64.h"
  37 #include "opcodes/i386-init.h"
  38 #include "opcodes/i386-mnem.h"
  39 #include <limits.h>
  40
  41 #ifndef INFER_ADDR_PREFIX
  42 #define INFER_ADDR_PREFIX 1
  43 #endif
  44
  45 #ifndef DEFAULT_ARCH
  46 #define DEFAULT_ARCH "i386"
  47 #endif
  48
  49 #ifndef INLINE
  50 #if __GNUC__ >= 2
  51 #define INLINE __inline__
  52 #else
  53 #define INLINE
  54 #endif
  55 #endif
  56
  57 /* Prefixes will be emitted in the order defined below.
  58    WAIT_PREFIX must be the first prefix since FWAIT is really is an
  59    instruction, and so must come before any prefixes.
  60    The preferred prefix order is SEG_PREFIX, ADDR_PREFIX, DATA_PREFIX,
  61    REP_PREFIX/HLE_PREFIX, LOCK_PREFIX.  */
  62 #define WAIT_PREFIX     0
  63 #define SEG_PREFIX      1
  64 #define ADDR_PREFIX     2
  65 #define DATA_PREFIX     3
  66 #define REP_PREFIX      4
  67 #define HLE_PREFIX      REP_PREFIX
  68 #define BND_PREFIX      REP_PREFIX
  69 #define LOCK_PREFIX     5
  70 #define REX_PREFIX      6       /* must come last.  */
  71 #define MAX_PREFIXES    7       /* max prefixes per opcode */
  72
  73 /* we define the syntax here (modulo base,index,scale syntax) */
  74 #define REGISTER_PREFIX '%'
  75 #define IMMEDIATE_PREFIX '$'
  76 #define ABSOLUTE_PREFIX '*'
  77
  78 /* these are the instruction mnemonic suffixes in AT&T syntax or
  79    memory operand size in Intel syntax.  */
  80 #define WORD_MNEM_SUFFIX  'w'
  81 #define BYTE_MNEM_SUFFIX  'b'
  82 #define SHORT_MNEM_SUFFIX 's'
  83 #define LONG_MNEM_SUFFIX  'l'
  84 #define QWORD_MNEM_SUFFIX  'q'
  85
  86 #define END_OF_INSN '\0'
  87
  88 #define OPERAND_TYPE_NONE { .bitfield = { .class = ClassNone } }
  89
  90 /* This matches the C -> StaticRounding alias in the opcode table.  */
  91 #define commutative staticrounding
  92
  93 /*
  94   'templates' is for grouping together 'template' structures for opcodes
  95   of the same name.  This is only used for storing the insns in the grand
  96   ole hash table of insns.
  97   The templates themselves start at START and range up to (but not including)
  98   END.
  99   */
 100 typedef struct
 101 {
 102   const insn_template *start;
 103   const insn_template *end;
 104 }
 105 templates;
 106
 107 /* 386 operand encoding bytes:  see 386 book for details of this.  */
 108 typedef struct
 109 {
 110   unsigned int regmem;  /* codes register or memory operand */
 111   unsigned int reg;     /* codes register operand (or extended opcode) */
 112   unsigned int mode;    /* how to interpret regmem & reg */
 113 }
 114 modrm_byte;
 115
 116 /* x86-64 extension prefix.  */
 117 typedef int rex_byte;
 118
 119 /* 386 opcode byte to code indirect addressing.  */
 120 typedef struct
 121 {
 122   unsigned base;
 123   unsigned index;
 124   unsigned scale;
 125 }
 126 sib_byte;
 127
 128 /* x86 arch names, types and features */
 129 typedef struct
 130 {
 131   const char *name;             /* arch name */
 132   unsigned int len:8;           /* arch string length */
 133   bool skip:1;                  /* show_arch should skip this. */
 134   enum processor_type type;     /* arch type */
 135   enum { vsz_none, vsz_set, vsz_reset } vsz; /* vector size control */
 136   i386_cpu_flags enable;                /* cpu feature enable flags */
 137   i386_cpu_flags disable;       /* cpu feature disable flags */
 138 }
 139 arch_entry;
 140
 141 static void update_code_flag (int, int);
 142 static void s_insn (int);
 143 static void s_noopt (int);
 144 static void set_code_flag (int);
 145 static void set_16bit_gcc_code_flag (int);
 146 static void set_intel_syntax (int);
 147 static void set_intel_mnemonic (int);
 148 static void set_allow_index_reg (int);
 149 static void set_check (int);
 150 static void set_cpu_arch (int);
 151 #ifdef TE_PE
 152 static void pe_directive_secrel (int);
 153 static void pe_directive_secidx (int);
 154 #endif
 155 static void signed_cons (int);
 156 static char *output_invalid (int c);
 157 static int i386_finalize_immediate (segT, expressionS *, i386_operand_type,
 158                                     const char *);
 159 static int i386_finalize_displacement (segT, expressionS *, i386_operand_type,
 160                                        const char *);
 161 static int i386_att_operand (char *);
 162 static int i386_intel_operand (char *, int);
 163 static int i386_intel_simplify (expressionS *);
 164 static int i386_intel_parse_name (const char *, expressionS *);
 165 static const reg_entry *parse_register (const char *, char **);
 166 static const char *parse_insn (const char *, char *, bool);
 167 static char *parse_operands (char *, const char *);
 168 static void swap_operands (void);
 169 static void swap_2_operands (unsigned int, unsigned int);
 170 static enum i386_flag_code i386_addressing_mode (void);
 171 static void optimize_imm (void);
 172 static bool optimize_disp (const insn_template *t);
 173 static const insn_template *match_template (char);
 174 static int check_string (void);
 175 static int process_suffix (void);
 176 static int check_byte_reg (void);
 177 static int check_long_reg (void);
 178 static int check_qword_reg (void);
 179 static int check_word_reg (void);
 180 static int finalize_imm (void);
 181 static int process_operands (void);
 182 static const reg_entry *build_modrm_byte (void);
 183 static void output_insn (const struct last_insn *);
 184 static void output_imm (fragS *, offsetT);
 185 static void output_disp (fragS *, offsetT);
 186 #ifdef OBJ_AOUT
 187 static void s_bss (int);
 188 #endif
 189 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 190 static void handle_large_common (int small ATTRIBUTE_UNUSED);
 191
 192 /* GNU_PROPERTY_X86_ISA_1_USED.  */
 193 static unsigned int x86_isa_1_used;
 194 /* GNU_PROPERTY_X86_FEATURE_2_USED.  */
 195 static unsigned int x86_feature_2_used;
 196 /* Generate x86 used ISA and feature properties.  */
 197 static unsigned int x86_used_note = DEFAULT_X86_USED_NOTE;
 198 #endif
 199
 200 static const char *default_arch = DEFAULT_ARCH;
 201
 202 /* parse_register() returns this when a register alias cannot be used.  */
 203 static const reg_entry bad_reg = { "<bad>", OPERAND_TYPE_NONE, 0, 0,
 204                                    { Dw2Inval, Dw2Inval } };
 205
 206 static const reg_entry *reg_eax;
 207 static const reg_entry *reg_ds;
 208 static const reg_entry *reg_es;
 209 static const reg_entry *reg_ss;
 210 static const reg_entry *reg_st0;
 211 static const reg_entry *reg_k0;
 212
 213 /* VEX prefix.  */
 214 typedef struct
 215 {
 216   /* VEX prefix is either 2 byte or 3 byte.  EVEX is 4 byte.  */
 217   unsigned char bytes[4];
 218   unsigned int length;
 219   /* Destination or source register specifier.  */
 220   const reg_entry *register_specifier;
 221 } vex_prefix;
 222
 223 /* 'md_assemble ()' gathers together information and puts it into a
 224    i386_insn.  */
 225
 226 union i386_op
 227   {
 228     expressionS *disps;
 229     expressionS *imms;
 230     const reg_entry *regs;
 231   };
 232
 233 enum i386_error
 234   {
 235     no_error, /* Must be first.  */
 236     operand_size_mismatch,
 237     operand_type_mismatch,
 238     register_type_mismatch,
 239     number_of_operands_mismatch,
 240     invalid_instruction_suffix,
 241     bad_imm4,
 242     unsupported_with_intel_mnemonic,
 243     unsupported_syntax,
 244     unsupported_EGPR_for_addressing,
 245     unsupported_nf,
 246     unsupported,
 247     unsupported_on_arch,
 248     unsupported_64bit,
 249     no_vex_encoding,
 250     no_evex_encoding,
 251     invalid_sib_address,
 252     invalid_vsib_address,
 253     invalid_vector_register_set,
 254     invalid_tmm_register_set,
 255     invalid_dest_and_src_register_set,
 256     invalid_dest_register_set,
 257     invalid_pseudo_prefix,
 258     unsupported_vector_index_register,
 259     unsupported_broadcast,
 260     broadcast_needed,
 261     unsupported_masking,
 262     mask_not_on_destination,
 263     no_default_mask,
 264     unsupported_rc_sae,
 265     unsupported_vector_size,
 266     unsupported_rsp_register,
 267     internal_error,
 268   };
 269
 270 struct _i386_insn
 271   {
 272     /* TM holds the template for the insn were currently assembling.  */
 273     insn_template tm;
 274
 275     /* SUFFIX holds the instruction size suffix for byte, word, dword
 276        or qword, if given.  */
 277     char suffix;
 278
 279     /* OPCODE_LENGTH holds the number of base opcode bytes.  */
 280     unsigned char opcode_length;
 281
 282     /* OPERANDS gives the number of given operands.  */
 283     unsigned int operands;
 284
 285     /* REG_OPERANDS, DISP_OPERANDS, MEM_OPERANDS, IMM_OPERANDS give the number
 286        of given register, displacement, memory operands and immediate
 287        operands.  */
 288     unsigned int reg_operands, disp_operands, mem_operands, imm_operands;
 289
 290     /* TYPES [i] is the type (see above #defines) which tells us how to
 291        use OP[i] for the corresponding operand.  */
 292     i386_operand_type types[MAX_OPERANDS];
 293
 294     /* Displacement expression, immediate expression, or register for each
 295        operand.  */
 296     union i386_op op[MAX_OPERANDS];
 297
 298     /* Flags for operands.  */
 299     unsigned int flags[MAX_OPERANDS];
 300 #define Operand_PCrel 1
 301 #define Operand_Mem   2
 302 #define Operand_Signed 4 /* .insn only */
 303
 304     /* Relocation type for operand */
 305     enum bfd_reloc_code_real reloc[MAX_OPERANDS];
 306
 307     /* BASE_REG, INDEX_REG, and LOG2_SCALE_FACTOR are used to encode
 308        the base index byte below.  */
 309     const reg_entry *base_reg;
 310     const reg_entry *index_reg;
 311     unsigned int log2_scale_factor;
 312
 313     /* SEG gives the seg_entries of this insn.  They are zero unless
 314        explicit segment overrides are given.  */
 315     const reg_entry *seg[2];
 316
 317     /* PREFIX holds all the given prefix opcodes (usually null).
 318        PREFIXES is the number of prefix opcodes.  */
 319     unsigned int prefixes;
 320     unsigned char prefix[MAX_PREFIXES];
 321
 322     /* .insn allows for reserved opcode spaces.  */
 323     unsigned char insn_opcode_space;
 324
 325     /* .insn also allows (requires) specifying immediate size.  */
 326     unsigned char imm_bits[MAX_OPERANDS];
 327
 328     /* Register is in low 3 bits of opcode.  */
 329     bool short_form;
 330
 331     /* The operand to a branch insn indicates an absolute branch.  */
 332     bool jumpabsolute;
 333
 334     /* The operand to a branch insn indicates a far branch.  */
 335     bool far_branch;
 336
 337     /* There is a memory operand of (%dx) which should be only used
 338        with input/output instructions.  */
 339     bool input_output_operand;
 340
 341     /* Extended states.  */
 342     enum
 343       {
 344         /* Use MMX state.  */
 345         xstate_mmx = 1 << 0,
 346         /* Use XMM state.  */
 347         xstate_xmm = 1 << 1,
 348         /* Use YMM state.  */
 349         xstate_ymm = 1 << 2 | xstate_xmm,
 350         /* Use ZMM state.  */
 351         xstate_zmm = 1 << 3 | xstate_ymm,
 352         /* Use TMM state.  */
 353         xstate_tmm = 1 << 4,
 354         /* Use MASK state.  */
 355         xstate_mask = 1 << 5
 356       } xstate;
 357
 358     /* Has GOTPC or TLS relocation.  */
 359     bool has_gotpc_tls_reloc;
 360
 361     /* RM and SIB are the modrm byte and the sib byte where the
 362        addressing modes of this insn are encoded.  */
 363     modrm_byte rm;
 364     rex_byte rex;
 365     rex_byte vrex;
 366     rex_byte rex2;
 367     sib_byte sib;
 368     vex_prefix vex;
 369
 370     /* Masking attributes.
 371
 372        The struct describes masking, applied to OPERAND in the instruction.
 373        REG is a pointer to the corresponding mask register.  ZEROING tells
 374        whether merging or zeroing mask is used.  */
 375     struct Mask_Operation
 376     {
 377       const reg_entry *reg;
 378       unsigned int zeroing;
 379       /* The operand where this operation is associated.  */
 380       unsigned int operand;
 381     } mask;
 382
 383     /* Rounding control and SAE attributes.  */
 384     struct RC_Operation
 385     {
 386       enum rc_type
 387         {
 388           rc_none = -1,
 389           rne,
 390           rd,
 391           ru,
 392           rz,
 393           saeonly
 394         } type;
 395       /* In Intel syntax the operand modifier form is supposed to be used, but
 396          we continue to accept the immediate forms as well.  */
 397       bool modifier;
 398     } rounding;
 399
 400     /* Broadcasting attributes.
 401
 402        The struct describes broadcasting, applied to OPERAND.  TYPE is
 403        expresses the broadcast factor.  */
 404     struct Broadcast_Operation
 405     {
 406       /* Type of broadcast: {1to2}, {1to4}, {1to8}, {1to16} or {1to32}.  */
 407       unsigned int type;
 408
 409       /* Index of broadcasted operand.  */
 410       unsigned int operand;
 411
 412       /* Number of bytes to broadcast.  */
 413       unsigned int bytes;
 414     } broadcast;
 415
 416     /* Compressed disp8*N attribute.  */
 417     unsigned int memshift;
 418
 419     /* Prefer load or store in encoding.  */
 420     enum
 421       {
 422         dir_encoding_default = 0,
 423         dir_encoding_load,
 424         dir_encoding_store,
 425         dir_encoding_swap
 426       } dir_encoding;
 427
 428     /* Prefer 8bit, 16bit, 32bit displacement in encoding.  */
 429     enum
 430       {
 431         disp_encoding_default = 0,
 432         disp_encoding_8bit,
 433         disp_encoding_16bit,
 434         disp_encoding_32bit
 435       } disp_encoding;
 436
 437     /* Prefer the REX byte in encoding.  */
 438     bool rex_encoding;
 439
 440     /* Prefer the REX2 prefix in encoding.  */
 441     bool rex2_encoding;
 442
 443     /* No CSPAZO flags update.  */
 444     bool has_nf;
 445
 446     /* Disable instruction size optimization.  */
 447     bool no_optimize;
 448
 449     /* How to encode instructions.  */
 450     enum
 451       {
 452         encoding_default = 0,
 453         encoding_vex,
 454         encoding_vex3,
 455         encoding_egpr, /* REX2 or EVEX.  */
 456         encoding_evex,
 457         encoding_evex512,
 458         encoding_error
 459       } encoding;
 460
 461     /* REP prefix.  */
 462     const char *rep_prefix;
 463
 464     /* HLE prefix.  */
 465     const char *hle_prefix;
 466
 467     /* Have BND prefix.  */
 468     const char *bnd_prefix;
 469
 470     /* Have NOTRACK prefix.  */
 471     const char *notrack_prefix;
 472
 473     /* Error message.  */
 474     enum i386_error error;
 475   };
 476
 477 typedef struct _i386_insn i386_insn;
 478
 479 /* Link RC type with corresponding string, that'll be looked for in
 480    asm.  */
 481 struct RC_name
 482 {
 483   enum rc_type type;
 484   const char *name;
 485   unsigned int len;
 486 };
 487
 488 static const struct RC_name RC_NamesTable[] =
 489 {
 490   {  rne, STRING_COMMA_LEN ("rn-sae") },
 491   {  rd,  STRING_COMMA_LEN ("rd-sae") },
 492   {  ru,  STRING_COMMA_LEN ("ru-sae") },
 493   {  rz,  STRING_COMMA_LEN ("rz-sae") },
 494   {  saeonly,  STRING_COMMA_LEN ("sae") },
 495 };
 496
 497 /* To be indexed by segment register number.  */
 498 static const unsigned char i386_seg_prefixes[] = {
 499   ES_PREFIX_OPCODE,
 500   CS_PREFIX_OPCODE,
 501   SS_PREFIX_OPCODE,
 502   DS_PREFIX_OPCODE,
 503   FS_PREFIX_OPCODE,
 504   GS_PREFIX_OPCODE
 505 };
 506
 507 /* List of chars besides those in app.c:symbol_chars that can start an
 508    operand.  Used to prevent the scrubber eating vital white-space.  */
 509 const char extra_symbol_chars[] = "*%-([{}"
 510 #ifdef LEX_AT
 511         "@"
 512 #endif
 513 #ifdef LEX_QM
 514         "?"
 515 #endif
 516         ;
 517
 518 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF))     \
 519      && !defined (TE_GNU)                               \
 520      && !defined (TE_LINUX)                             \
 521      && !defined (TE_Haiku)                             \
 522      && !defined (TE_FreeBSD)                           \
 523      && !defined (TE_DragonFly)                         \
 524      && !defined (TE_NetBSD))
 525 /* This array holds the chars that always start a comment.  If the
 526    pre-processor is disabled, these aren't very useful.  The option
 527    --divide will remove '/' from this list.  */
 528 const char *i386_comment_chars = "#/";
 529 #define SVR4_COMMENT_CHARS 1
 530 #define PREFIX_SEPARATOR '\\'
 531
 532 #else
 533 const char *i386_comment_chars = "#";
 534 #define PREFIX_SEPARATOR '/'
 535 #endif
 536
 537 /* This array holds the chars that only start a comment at the beginning of
 538    a line.  If the line seems to have the form '# 123 filename'
 539    .line and .file directives will appear in the pre-processed output.
 540    Note that input_file.c hand checks for '#' at the beginning of the
 541    first line of the input file.  This is because the compiler outputs
 542    #NO_APP at the beginning of its output.
 543    Also note that comments started like this one will always work if
 544    '/' isn't otherwise defined.  */
 545 const char line_comment_chars[] = "#/";
 546
 547 const char line_separator_chars[] = ";";
 548
 549 /* Chars that can be used to separate mant from exp in floating point
 550    nums.  */
 551 const char EXP_CHARS[] = "eE";
 552
 553 /* Chars that mean this number is a floating point constant
 554    As in 0f12.456
 555    or    0d1.2345e12.  */
 556 const char FLT_CHARS[] = "fFdDxXhHbB";
 557
 558 /* Tables for lexical analysis.  */
 559 static char mnemonic_chars[256];
 560 static char register_chars[256];
 561 static char operand_chars[256];
 562
 563 /* Lexical macros.  */
 564 #define is_operand_char(x) (operand_chars[(unsigned char) x])
 565 #define is_register_char(x) (register_chars[(unsigned char) x])
 566 #define is_space_char(x) ((x) == ' ')
 567
 568 /* All non-digit non-letter characters that may occur in an operand and
 569    which aren't already in extra_symbol_chars[].  */
 570 static const char operand_special_chars[] = "$+,)._~/<>|&^!=:@]";
 571
 572 /* md_assemble() always leaves the strings it's passed unaltered.  To
 573    effect this we maintain a stack of saved characters that we've smashed
 574    with '\0's (indicating end of strings for various sub-fields of the
 575    assembler instruction).  */
 576 static char save_stack[32];
 577 static char *save_stack_p;
 578 #define END_STRING_AND_SAVE(s) \
 579         do { *save_stack_p++ = *(s); *(s) = '\0'; } while (0)
 580 #define RESTORE_END_STRING(s) \
 581         do { *(s) = *--save_stack_p; } while (0)
 582
 583 /* The instruction we're assembling.  */
 584 static i386_insn i;
 585
 586 /* Possible templates for current insn.  */
 587 static templates current_templates;
 588
 589 /* Per instruction expressionS buffers: max displacements & immediates.  */
 590 static expressionS disp_expressions[MAX_MEMORY_OPERANDS];
 591 static expressionS im_expressions[MAX_IMMEDIATE_OPERANDS];
 592
 593 /* Current operand we are working on.  */
 594 static int this_operand = -1;
 595
 596 /* Are we processing a .insn directive?  */
 597 #define dot_insn() (i.tm.mnem_off == MN__insn)
 598
 599 enum i386_flag_code i386_flag_code;
 600 #define flag_code i386_flag_code /* Permit to continue using original name.  */
 601 static unsigned int object_64bit;
 602 static unsigned int disallow_64bit_reloc;
 603 static int use_rela_relocations = 0;
 604 /* __tls_get_addr/___tls_get_addr symbol for TLS.  */
 605 static const char *tls_get_addr;
 606
 607 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 608
 609 /* The ELF ABI to use.  */
 610 enum x86_elf_abi
 611 {
 612   I386_ABI,
 613   X86_64_ABI,
 614   X86_64_X32_ABI
 615 };
 616
 617 static enum x86_elf_abi x86_elf_abi = I386_ABI;
 618 #endif
 619
 620 #if defined (TE_PE) || defined (TE_PEP)
 621 /* Use big object file format.  */
 622 static int use_big_obj = 0;
 623 #endif
 624
 625 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 626 /* 1 if generating code for a shared library.  */
 627 static int shared = 0;
 628
 629 unsigned int x86_sframe_cfa_sp_reg;
 630 /* The other CFA base register for SFrame stack trace info.  */
 631 unsigned int x86_sframe_cfa_fp_reg;
 632 unsigned int x86_sframe_cfa_ra_reg;
 633
 634 #endif
 635
 636 /* 1 for intel syntax,
 637    0 if att syntax.  */
 638 static int intel_syntax = 0;
 639
 640 static enum x86_64_isa
 641 {
 642   amd64 = 1,    /* AMD64 ISA.  */
 643   intel64       /* Intel64 ISA.  */
 644 } isa64;
 645
 646 /* 1 for intel mnemonic,
 647    0 if att mnemonic.  */
 648 static int intel_mnemonic = !SYSV386_COMPAT;
 649
 650 /* 1 if pseudo registers are permitted.  */
 651 static int allow_pseudo_reg = 0;
 652
 653 /* 1 if register prefix % not required.  */
 654 static int allow_naked_reg = 0;
 655
 656 /* 1 if the assembler should add BND prefix for all control-transferring
 657    instructions supporting it, even if this prefix wasn't specified
 658    explicitly.  */
 659 static int add_bnd_prefix = 0;
 660
 661 /* 1 if pseudo index register, eiz/riz, is allowed .  */
 662 static int allow_index_reg = 0;
 663
 664 /* 1 if the assembler should ignore LOCK prefix, even if it was
 665    specified explicitly.  */
 666 static int omit_lock_prefix = 0;
 667
 668 /* 1 if the assembler should encode lfence, mfence, and sfence as
 669    "lock addl $0, (%{re}sp)".  */
 670 static int avoid_fence = 0;
 671
 672 /* 1 if lfence should be inserted after every load.  */
 673 static int lfence_after_load = 0;
 674
 675 /* Non-zero if lfence should be inserted before indirect branch.  */
 676 static enum lfence_before_indirect_branch_kind
 677   {
 678     lfence_branch_none = 0,
 679     lfence_branch_register,
 680     lfence_branch_memory,
 681     lfence_branch_all
 682   }
 683 lfence_before_indirect_branch;
 684
 685 /* Non-zero if lfence should be inserted before ret.  */
 686 static enum lfence_before_ret_kind
 687   {
 688     lfence_before_ret_none = 0,
 689     lfence_before_ret_not,
 690     lfence_before_ret_or,
 691     lfence_before_ret_shl
 692   }
 693 lfence_before_ret;
 694
 695 /* 1 if the assembler should generate relax relocations.  */
 696
 697 static int generate_relax_relocations
 698   = DEFAULT_GENERATE_X86_RELAX_RELOCATIONS;
 699
 700 static enum check_kind
 701   {
 702     check_none = 0,
 703     check_warning,
 704     check_error
 705   }
 706 sse_check, operand_check = check_warning;
 707
 708 /* Non-zero if branches should be aligned within power of 2 boundary.  */
 709 static int align_branch_power = 0;
 710
 711 /* Types of branches to align.  */
 712 enum align_branch_kind
 713   {
 714     align_branch_none = 0,
 715     align_branch_jcc = 1,
 716     align_branch_fused = 2,
 717     align_branch_jmp = 3,
 718     align_branch_call = 4,
 719     align_branch_indirect = 5,
 720     align_branch_ret = 6
 721   };
 722
 723 /* Type bits of branches to align.  */
 724 enum align_branch_bit
 725   {
 726     align_branch_jcc_bit = 1 << align_branch_jcc,
 727     align_branch_fused_bit = 1 << align_branch_fused,
 728     align_branch_jmp_bit = 1 << align_branch_jmp,
 729     align_branch_call_bit = 1 << align_branch_call,
 730     align_branch_indirect_bit = 1 << align_branch_indirect,
 731     align_branch_ret_bit = 1 << align_branch_ret
 732   };
 733
 734 static unsigned int align_branch = (align_branch_jcc_bit
 735                                     | align_branch_fused_bit
 736                                     | align_branch_jmp_bit);
 737
 738 /* Types of condition jump used by macro-fusion.  */
 739 enum mf_jcc_kind
 740   {
 741     mf_jcc_jo = 0,  /* base opcode 0x70  */
 742     mf_jcc_jc,      /* base opcode 0x72  */
 743     mf_jcc_je,      /* base opcode 0x74  */
 744     mf_jcc_jna,     /* base opcode 0x76  */
 745     mf_jcc_js,      /* base opcode 0x78  */
 746     mf_jcc_jp,      /* base opcode 0x7a  */
 747     mf_jcc_jl,      /* base opcode 0x7c  */
 748     mf_jcc_jle,     /* base opcode 0x7e  */
 749   };
 750
 751 /* Types of compare flag-modifying insntructions used by macro-fusion.  */
 752 enum mf_cmp_kind
 753   {
 754     mf_cmp_test_and,  /* test/cmp */
 755     mf_cmp_alu_cmp,  /* add/sub/cmp */
 756     mf_cmp_incdec  /* inc/dec */
 757   };
 758
 759 /* The maximum padding size for fused jcc.  CMP like instruction can
 760    be 9 bytes and jcc can be 6 bytes.  Leave room just in case for
 761    prefixes.   */
 762 #define MAX_FUSED_JCC_PADDING_SIZE 20
 763
 764 /* The maximum number of prefixes added for an instruction.  */
 765 static unsigned int align_branch_prefix_size = 5;
 766
 767 /* Optimization:
 768    1. Clear the REX_W bit with register operand if possible.
 769    2. Above plus use 128bit vector instruction to clear the full vector
 770       register.
 771  */
 772 static int optimize = 0;
 773
 774 /* Optimization:
 775    1. Clear the REX_W bit with register operand if possible.
 776    2. Above plus use 128bit vector instruction to clear the full vector
 777       register.
 778    3. Above plus optimize "test{q,l,w} $imm8,%r{64,32,16}" to
 779       "testb $imm7,%r8".
 780  */
 781 static int optimize_for_space = 0;
 782
 783 /* Register prefix used for error message.  */
 784 static const char *register_prefix = "%";
 785
 786 /* Used in 16 bit gcc mode to add an l suffix to call, ret, enter,
 787    leave, push, and pop instructions so that gcc has the same stack
 788    frame as in 32 bit mode.  */
 789 static char stackop_size = '\0';
 790
 791 /* Non-zero to optimize code alignment.  */
 792 int optimize_align_code = 1;
 793
 794 /* Non-zero to quieten some warnings.  */
 795 static int quiet_warnings = 0;
 796
 797 /* Guard to avoid repeated warnings about non-16-bit code on 16-bit CPUs.  */
 798 static bool pre_386_16bit_warned;
 799
 800 /* CPU name.  */
 801 static const char *cpu_arch_name = NULL;
 802 static char *cpu_sub_arch_name = NULL;
 803
 804 /* CPU feature flags.  */
 805 i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
 806
 807 /* ISA extensions available in 64-bit mode only.  */
 808 static const i386_cpu_flags cpu_64_flags = CPU_ANY_64_FLAGS;
 809
 810 /* If we have selected a cpu we are generating instructions for.  */
 811 static int cpu_arch_tune_set = 0;
 812
 813 /* Cpu we are generating instructions for.  */
 814 enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
 815
 816 /* CPU instruction set architecture used.  */
 817 enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
 818
 819 /* CPU feature flags of instruction set architecture used.  */
 820 i386_cpu_flags cpu_arch_isa_flags;
 821
 822 /* If set, conditional jumps are not automatically promoted to handle
 823    larger than a byte offset.  */
 824 static bool no_cond_jump_promotion = false;
 825
 826 /* This will be set from an expression parser hook if there's any
 827    applicable operator involved in an expression.  */
 828 static enum {
 829   expr_operator_none,
 830   expr_operator_present,
 831   expr_large_value,
 832 } expr_mode;
 833
 834 /* Encode SSE instructions with VEX prefix.  */
 835 static unsigned int sse2avx;
 836
 837 /* Encode aligned vector move as unaligned vector move.  */
 838 static unsigned int use_unaligned_vector_move;
 839
 840 /* Maximum permitted vector size. */
 841 #define VSZ128 0
 842 #define VSZ256 1
 843 #define VSZ512 2
 844 #define VSZ_DEFAULT VSZ512
 845 static unsigned int vector_size = VSZ_DEFAULT;
 846
 847 /* Encode scalar AVX instructions with specific vector length.  */
 848 static enum
 849   {
 850     vex128 = 0,
 851     vex256
 852   } avxscalar;
 853
 854 /* Encode VEX WIG instructions with specific vex.w.  */
 855 static enum
 856   {
 857     vexw0 = 0,
 858     vexw1
 859   } vexwig;
 860
 861 /* Encode scalar EVEX LIG instructions with specific vector length.  */
 862 static enum
 863   {
 864     evexl128 = 0,
 865     evexl256,
 866     evexl512
 867   } evexlig;
 868
 869 /* Encode EVEX WIG instructions with specific evex.w.  */
 870 static enum
 871   {
 872     evexw0 = 0,
 873     evexw1
 874   } evexwig;
 875
 876 /* Value to encode in EVEX RC bits, for SAE-only instructions.  */
 877 static enum rc_type evexrcig = rne;
 878
 879 /* Pre-defined "_GLOBAL_OFFSET_TABLE_".  */
 880 static symbolS *GOT_symbol;
 881
 882 /* The dwarf2 return column, adjusted for 32 or 64 bit.  */
 883 unsigned int x86_dwarf2_return_column;
 884
 885 /* The dwarf2 data alignment, adjusted for 32 or 64 bit.  */
 886 int x86_cie_data_alignment;
 887
 888 /* Interface to relax_segment.
 889    There are 3 major relax states for 386 jump insns because the
 890    different types of jumps add different sizes to frags when we're
 891    figuring out what sort of jump to choose to reach a given label.
 892
 893    BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING are used to align
 894    branches which are handled by md_estimate_size_before_relax() and
 895    i386_generic_table_relax_frag().  */
 896
 897 /* Types.  */
 898 #define UNCOND_JUMP 0
 899 #define COND_JUMP 1
 900 #define COND_JUMP86 2
 901 #define BRANCH_PADDING 3
 902 #define BRANCH_PREFIX 4
 903 #define FUSED_JCC_PADDING 5
 904
 905 /* Sizes.  */
 906 #define CODE16  1
 907 #define SMALL   0
 908 #define SMALL16 (SMALL | CODE16)
 909 #define BIG     2
 910 #define BIG16   (BIG | CODE16)
 911
 912 #ifndef INLINE
 913 #ifdef __GNUC__
 914 #define INLINE __inline__
 915 #else
 916 #define INLINE
 917 #endif
 918 #endif
 919
 920 #define ENCODE_RELAX_STATE(type, size) \
 921   ((relax_substateT) (((type) << 2) | (size)))
 922 #define TYPE_FROM_RELAX_STATE(s) \
 923   ((s) >> 2)
 924 #define DISP_SIZE_FROM_RELAX_STATE(s) \
 925     ((((s) & 3) == BIG ? 4 : (((s) & 3) == BIG16 ? 2 : 1)))
 926
 927 /* This table is used by relax_frag to promote short jumps to long
 928    ones where necessary.  SMALL (short) jumps may be promoted to BIG
 929    (32 bit long) ones, and SMALL16 jumps to BIG16 (16 bit long).  We
 930    don't allow a short jump in a 32 bit code segment to be promoted to
 931    a 16 bit offset jump because it's slower (requires data size
 932    prefix), and doesn't work, unless the destination is in the bottom
 933    64k of the code segment (The top 16 bits of eip are zeroed).  */
 934
 935 const relax_typeS md_relax_table[] =
 936 {
 937   /* The fields are:
 938      1) most positive reach of this state,
 939      2) most negative reach of this state,
 940      3) how many bytes this mode will have in the variable part of the frag
 941      4) which index into the table to try if we can't fit into this one.  */
 942
 943   /* UNCOND_JUMP states.  */
 944   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)},
 945   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)},
 946   /* dword jmp adds 4 bytes to frag:
 947      0 extra opcode bytes, 4 displacement bytes.  */
 948   {0, 0, 4, 0},
 949   /* word jmp adds 2 byte2 to frag:
 950      0 extra opcode bytes, 2 displacement bytes.  */
 951   {0, 0, 2, 0},
 952
 953   /* COND_JUMP states.  */
 954   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG)},
 955   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG16)},
 956   /* dword conditionals adds 5 bytes to frag:
 957      1 extra opcode byte, 4 displacement bytes.  */
 958   {0, 0, 5, 0},
 959   /* word conditionals add 3 bytes to frag:
 960      1 extra opcode byte, 2 displacement bytes.  */
 961   {0, 0, 3, 0},
 962
 963   /* COND_JUMP86 states.  */
 964   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG)},
 965   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG16)},
 966   /* dword conditionals adds 5 bytes to frag:
 967      1 extra opcode byte, 4 displacement bytes.  */
 968   {0, 0, 5, 0},
 969   /* word conditionals add 4 bytes to frag:
 970      1 displacement byte and a 3 byte long branch insn.  */
 971   {0, 0, 4, 0}
 972 };
 973
 974 #define ARCH(n, t, f, s) \
 975   { STRING_COMMA_LEN (#n), s, PROCESSOR_ ## t, vsz_none, CPU_ ## f ## _FLAGS, \
 976     CPU_NONE_FLAGS }
 977 #define SUBARCH(n, e, d, s) \
 978   { STRING_COMMA_LEN (#n), s, PROCESSOR_NONE, vsz_none, CPU_ ## e ## _FLAGS, \
 979     CPU_ ## d ## _FLAGS }
 980 #define VECARCH(n, e, d, v) \
 981   { STRING_COMMA_LEN (#n), false, PROCESSOR_NONE, vsz_ ## v, \
 982     CPU_ ## e ## _FLAGS, CPU_ ## d ## _FLAGS }
 983
 984 static const arch_entry cpu_arch[] =
 985 {
 986   /* Do not replace the first two entries - i386_target_format() and
 987      set_cpu_arch() rely on them being there in this order.  */
 988   ARCH (generic32, GENERIC32, GENERIC32, false),
 989   ARCH (generic64, GENERIC64, GENERIC64, false),
 990   ARCH (i8086, UNKNOWN, NONE, false),
 991   ARCH (i186, UNKNOWN, 186, false),
 992   ARCH (i286, UNKNOWN, 286, false),
 993   ARCH (i386, I386, 386, false),
 994   ARCH (i486, I486, 486, false),
 995   ARCH (i586, PENTIUM, 586, false),
 996   ARCH (pentium, PENTIUM, 586, false),
 997   ARCH (i686, I686, 686, false),
 998   ARCH (pentiumpro, PENTIUMPRO, PENTIUMPRO, false),
 999   ARCH (pentiumii, PENTIUMPRO, P2, false),
1000   ARCH (pentiumiii, PENTIUMPRO, P3, false),
1001   ARCH (pentium4, PENTIUM4, P4, false),
1002   ARCH (prescott, NOCONA, CORE, false),
1003   ARCH (nocona, NOCONA, NOCONA, false),
1004   ARCH (yonah, CORE, CORE, true),
1005   ARCH (core, CORE, CORE, false),
1006   ARCH (merom, CORE2, CORE2, true),
1007   ARCH (core2, CORE2, CORE2, false),
1008   ARCH (corei7, COREI7, COREI7, false),
1009   ARCH (iamcu, IAMCU, IAMCU, false),
1010   ARCH (k6, K6, K6, false),
1011   ARCH (k6_2, K6, K6_2, false),
1012   ARCH (athlon, ATHLON, ATHLON, false),
1013   ARCH (sledgehammer, K8, K8, true),
1014   ARCH (opteron, K8, K8, false),
1015   ARCH (k8, K8, K8, false),
1016   ARCH (amdfam10, AMDFAM10, AMDFAM10, false),
1017   ARCH (bdver1, BD, BDVER1, false),
1018   ARCH (bdver2, BD, BDVER2, false),
1019   ARCH (bdver3, BD, BDVER3, false),
1020   ARCH (bdver4, BD, BDVER4, false),
1021   ARCH (znver1, ZNVER, ZNVER1, false),
1022   ARCH (znver2, ZNVER, ZNVER2, false),
1023   ARCH (znver3, ZNVER, ZNVER3, false),
1024   ARCH (znver4, ZNVER, ZNVER4, false),
1025   ARCH (znver5, ZNVER, ZNVER5, false),
1026   ARCH (btver1, BT, BTVER1, false),
1027   ARCH (btver2, BT, BTVER2, false),
1028
1029   SUBARCH (8087, 8087, ANY_8087, false),
1030   SUBARCH (87, NONE, ANY_8087, false), /* Disable only!  */
1031   SUBARCH (287, 287, ANY_287, false),
1032   SUBARCH (387, 387, ANY_387, false),
1033   SUBARCH (687, 687, ANY_687, false),
1034   SUBARCH (cmov, CMOV, CMOV, false),
1035   SUBARCH (fxsr, FXSR, ANY_FXSR, false),
1036   SUBARCH (mmx, MMX, ANY_MMX, false),
1037   SUBARCH (sse, SSE, ANY_SSE, false),
1038   SUBARCH (sse2, SSE2, ANY_SSE2, false),
1039   SUBARCH (sse3, SSE3, ANY_SSE3, false),
1040   SUBARCH (sse4a, SSE4A, ANY_SSE4A, false),
1041   SUBARCH (ssse3, SSSE3, ANY_SSSE3, false),
1042   SUBARCH (sse4.1, SSE4_1, ANY_SSE4_1, false),
1043   SUBARCH (sse4.2, SSE4_2, ANY_SSE4_2, false),
1044   SUBARCH (sse4, SSE4_2, ANY_SSE4_1, false),
1045   VECARCH (avx, AVX, ANY_AVX, reset),
1046   VECARCH (avx2, AVX2, ANY_AVX2, reset),
1047   VECARCH (avx512f, AVX512F, ANY_AVX512F, reset),
1048   VECARCH (avx512cd, AVX512CD, ANY_AVX512CD, reset),
1049   VECARCH (avx512er, AVX512ER, ANY_AVX512ER, reset),
1050   VECARCH (avx512pf, AVX512PF, ANY_AVX512PF, reset),
1051   VECARCH (avx512dq, AVX512DQ, ANY_AVX512DQ, reset),
1052   VECARCH (avx512bw, AVX512BW, ANY_AVX512BW, reset),
1053   VECARCH (avx512vl, AVX512VL, ANY_AVX512VL, reset),
1054   SUBARCH (monitor, MONITOR, MONITOR, false),
1055   SUBARCH (vmx, VMX, ANY_VMX, false),
1056   SUBARCH (vmfunc, VMFUNC, ANY_VMFUNC, false),
1057   SUBARCH (smx, SMX, SMX, false),
1058   SUBARCH (xsave, XSAVE, ANY_XSAVE, false),
1059   SUBARCH (xsaveopt, XSAVEOPT, ANY_XSAVEOPT, false),
1060   SUBARCH (xsavec, XSAVEC, ANY_XSAVEC, false),
1061   SUBARCH (xsaves, XSAVES, ANY_XSAVES, false),
1062   SUBARCH (aes, AES, ANY_AES, false),
1063   SUBARCH (pclmul, PCLMULQDQ, ANY_PCLMULQDQ, false),
1064   SUBARCH (clmul, PCLMULQDQ, ANY_PCLMULQDQ, true),
1065   SUBARCH (fsgsbase, FSGSBASE, FSGSBASE, false),
1066   SUBARCH (rdrnd, RDRND, RDRND, false),
1067   SUBARCH (f16c, F16C, ANY_F16C, false),
1068   SUBARCH (bmi2, BMI2, BMI2, false),
1069   SUBARCH (fma, FMA, ANY_FMA, false),
1070   SUBARCH (fma4, FMA4, ANY_FMA4, false),
1071   SUBARCH (xop, XOP, ANY_XOP, false),
1072   SUBARCH (lwp, LWP, ANY_LWP, false),
1073   SUBARCH (movbe, MOVBE, MOVBE, false),
1074   SUBARCH (cx16, CX16, CX16, false),
1075   SUBARCH (lahf_sahf, LAHF_SAHF, LAHF_SAHF, false),
1076   SUBARCH (ept, EPT, ANY_EPT, false),
1077   SUBARCH (lzcnt, LZCNT, LZCNT, false),
1078   SUBARCH (popcnt, POPCNT, POPCNT, false),
1079   SUBARCH (hle, HLE, HLE, false),
1080   SUBARCH (rtm, RTM, ANY_RTM, false),
1081   SUBARCH (tsx, TSX, TSX, false),
1082   SUBARCH (invpcid, INVPCID, INVPCID, false),
1083   SUBARCH (clflush, CLFLUSH, CLFLUSH, false),
1084   SUBARCH (nop, NOP, NOP, false),
1085   SUBARCH (syscall, SYSCALL, SYSCALL, false),
1086   SUBARCH (rdtscp, RDTSCP, RDTSCP, false),
1087   SUBARCH (3dnow, 3DNOW, ANY_3DNOW, false),
1088   SUBARCH (3dnowa, 3DNOWA, ANY_3DNOWA, false),
1089   SUBARCH (padlock, PADLOCK, PADLOCK, false),
1090   SUBARCH (pacifica, SVME, ANY_SVME, true),
1091   SUBARCH (svme, SVME, ANY_SVME, false),
1092   SUBARCH (abm, ABM, ABM, false),
1093   SUBARCH (bmi, BMI, BMI, false),
1094   SUBARCH (tbm, TBM, TBM, false),
1095   SUBARCH (adx, ADX, ADX, false),
1096   SUBARCH (rdseed, RDSEED, RDSEED, false),
1097   SUBARCH (prfchw, PRFCHW, PRFCHW, false),
1098   SUBARCH (smap, SMAP, SMAP, false),
1099   SUBARCH (mpx, MPX, ANY_MPX, false),
1100   SUBARCH (sha, SHA, ANY_SHA, false),
1101   SUBARCH (clflushopt, CLFLUSHOPT, CLFLUSHOPT, false),
1102   SUBARCH (prefetchwt1, PREFETCHWT1, PREFETCHWT1, false),
1103   SUBARCH (se1, SE1, SE1, false),
1104   SUBARCH (clwb, CLWB, CLWB, false),
1105   VECARCH (avx512ifma, AVX512IFMA, ANY_AVX512IFMA, reset),
1106   VECARCH (avx512vbmi, AVX512VBMI, ANY_AVX512VBMI, reset),
1107   VECARCH (avx512_4fmaps, AVX512_4FMAPS, ANY_AVX512_4FMAPS, reset),
1108   VECARCH (avx512_4vnniw, AVX512_4VNNIW, ANY_AVX512_4VNNIW, reset),
1109   VECARCH (avx512_vpopcntdq, AVX512_VPOPCNTDQ, ANY_AVX512_VPOPCNTDQ, reset),
1110   VECARCH (avx512_vbmi2, AVX512_VBMI2, ANY_AVX512_VBMI2, reset),
1111   VECARCH (avx512_vnni, AVX512_VNNI, ANY_AVX512_VNNI, reset),
1112   VECARCH (avx512_bitalg, AVX512_BITALG, ANY_AVX512_BITALG, reset),
1113   VECARCH (avx_vnni, AVX_VNNI, ANY_AVX_VNNI, reset),
1114   SUBARCH (clzero, CLZERO, CLZERO, false),
1115   SUBARCH (mwaitx, MWAITX, MWAITX, false),
1116   SUBARCH (ospke, OSPKE, ANY_OSPKE, false),
1117   SUBARCH (rdpid, RDPID, RDPID, false),
1118   SUBARCH (ptwrite, PTWRITE, PTWRITE, false),
1119   SUBARCH (ibt, IBT, IBT, false),
1120   SUBARCH (shstk, SHSTK, SHSTK, false),
1121   SUBARCH (gfni, GFNI, ANY_GFNI, false),
1122   VECARCH (vaes, VAES, ANY_VAES, reset),
1123   VECARCH (vpclmulqdq, VPCLMULQDQ, ANY_VPCLMULQDQ, reset),
1124   SUBARCH (wbnoinvd, WBNOINVD, WBNOINVD, false),
1125   SUBARCH (pconfig, PCONFIG, PCONFIG, false),
1126   SUBARCH (waitpkg, WAITPKG, WAITPKG, false),
1127   SUBARCH (cldemote, CLDEMOTE, CLDEMOTE, false),
1128   SUBARCH (amx_int8, AMX_INT8, ANY_AMX_INT8, false),
1129   SUBARCH (amx_bf16, AMX_BF16, ANY_AMX_BF16, false),
1130   SUBARCH (amx_fp16, AMX_FP16, ANY_AMX_FP16, false),
1131   SUBARCH (amx_complex, AMX_COMPLEX, ANY_AMX_COMPLEX, false),
1132   SUBARCH (amx_tile, AMX_TILE, ANY_AMX_TILE, false),
1133   SUBARCH (movdiri, MOVDIRI, MOVDIRI, false),
1134   SUBARCH (movdir64b, MOVDIR64B, MOVDIR64B, false),
1135   VECARCH (avx512_bf16, AVX512_BF16, ANY_AVX512_BF16, reset),
1136   VECARCH (avx512_vp2intersect, AVX512_VP2INTERSECT,
1137            ANY_AVX512_VP2INTERSECT, reset),
1138   SUBARCH (tdx, TDX, TDX, false),
1139   SUBARCH (enqcmd, ENQCMD, ENQCMD, false),
1140   SUBARCH (serialize, SERIALIZE, SERIALIZE, false),
1141   SUBARCH (rdpru, RDPRU, RDPRU, false),
1142   SUBARCH (mcommit, MCOMMIT, MCOMMIT, false),
1143   SUBARCH (sev_es, SEV_ES, ANY_SEV_ES, false),
1144   SUBARCH (tsxldtrk, TSXLDTRK, ANY_TSXLDTRK, false),
1145   SUBARCH (kl, KL, ANY_KL, false),
1146   SUBARCH (widekl, WIDEKL, ANY_WIDEKL, false),
1147   SUBARCH (uintr, UINTR, UINTR, false),
1148   SUBARCH (hreset, HRESET, HRESET, false),
1149   VECARCH (avx512_fp16, AVX512_FP16, ANY_AVX512_FP16, reset),
1150   SUBARCH (prefetchi, PREFETCHI, PREFETCHI, false),
1151   VECARCH (avx_ifma, AVX_IFMA, ANY_AVX_IFMA, reset),
1152   VECARCH (avx_vnni_int8, AVX_VNNI_INT8, ANY_AVX_VNNI_INT8, reset),
1153   SUBARCH (cmpccxadd, CMPCCXADD, CMPCCXADD, false),
1154   SUBARCH (wrmsrns, WRMSRNS, WRMSRNS, false),
1155   SUBARCH (msrlist, MSRLIST, MSRLIST, false),
1156   VECARCH (avx_ne_convert, AVX_NE_CONVERT, ANY_AVX_NE_CONVERT, reset),
1157   SUBARCH (rao_int, RAO_INT, RAO_INT, false),
1158   SUBARCH (rmpquery, RMPQUERY, ANY_RMPQUERY, false),
1159   SUBARCH (fred, FRED, ANY_FRED, false),
1160   SUBARCH (lkgs, LKGS, ANY_LKGS, false),
1161   VECARCH (avx_vnni_int16, AVX_VNNI_INT16, ANY_AVX_VNNI_INT16, reset),
1162   VECARCH (sha512, SHA512, ANY_SHA512, reset),
1163   VECARCH (sm3, SM3, ANY_SM3, reset),
1164   VECARCH (sm4, SM4, ANY_SM4, reset),
1165   SUBARCH (pbndkb, PBNDKB, PBNDKB, false),
1166   VECARCH (avx10.1, AVX10_1, ANY_AVX512F, set),
1167   SUBARCH (user_msr, USER_MSR, USER_MSR, false),
1168   SUBARCH (apx_f, APX_F, APX_F, false),
1169 };
1170
1171 #undef SUBARCH
1172 #undef ARCH
1173
1174 #ifdef I386COFF
1175 /* Like s_lcomm_internal in gas/read.c but the alignment string
1176    is allowed to be optional.  */
1177
1178 static symbolS *
1179 pe_lcomm_internal (int needs_align, symbolS *symbolP, addressT size)
1180 {
1181   addressT align = 0;
1182
1183   SKIP_WHITESPACE ();
1184
1185   if (needs_align
1186       && *input_line_pointer == ',')
1187     {
1188       align = parse_align (needs_align - 1);
1189
1190       if (align == (addressT) -1)
1191         return NULL;
1192     }
1193   else
1194     {
1195       if (size >= 8)
1196         align = 3;
1197       else if (size >= 4)
1198         align = 2;
1199       else if (size >= 2)
1200         align = 1;
1201       else
1202         align = 0;
1203     }
1204
1205   bss_alloc (symbolP, size, align);
1206   return symbolP;
1207 }
1208
1209 static void
1210 pe_lcomm (int needs_align)
1211 {
1212   s_comm_internal (needs_align * 2, pe_lcomm_internal);
1213 }
1214 #endif
1215
1216 const pseudo_typeS md_pseudo_table[] =
1217 {
1218 #if !defined(OBJ_AOUT) && !defined(USE_ALIGN_PTWO)
1219   {"align", s_align_bytes, 0},
1220 #else
1221   {"align", s_align_ptwo, 0},
1222 #endif
1223   {"arch", set_cpu_arch, 0},
1224 #ifdef OBJ_AOUT
1225   {"bss", s_bss, 0},
1226 #endif
1227 #ifdef I386COFF
1228   {"lcomm", pe_lcomm, 1},
1229 #endif
1230   {"ffloat", float_cons, 'f'},
1231   {"dfloat", float_cons, 'd'},
1232   {"tfloat", float_cons, 'x'},
1233   {"hfloat", float_cons, 'h'},
1234   {"bfloat16", float_cons, 'b'},
1235   {"value", cons, 2},
1236   {"slong", signed_cons, 4},
1237   {"insn", s_insn, 0},
1238   {"noopt", s_noopt, 0},
1239   {"optim", s_ignore, 0},
1240   {"code16gcc", set_16bit_gcc_code_flag, CODE_16BIT},
1241   {"code16", set_code_flag, CODE_16BIT},
1242   {"code32", set_code_flag, CODE_32BIT},
1243 #ifdef BFD64
1244   {"code64", set_code_flag, CODE_64BIT},
1245 #endif
1246   {"intel_syntax", set_intel_syntax, 1},
1247   {"att_syntax", set_intel_syntax, 0},
1248   {"intel_mnemonic", set_intel_mnemonic, 1},
1249   {"att_mnemonic", set_intel_mnemonic, 0},
1250   {"allow_index_reg", set_allow_index_reg, 1},
1251   {"disallow_index_reg", set_allow_index_reg, 0},
1252   {"sse_check", set_check, 0},
1253   {"operand_check", set_check, 1},
1254 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
1255   {"largecomm", handle_large_common, 0},
1256 #else
1257   {"file", dwarf2_directive_file, 0},
1258   {"loc", dwarf2_directive_loc, 0},
1259   {"loc_mark_labels", dwarf2_directive_loc_mark_labels, 0},
1260 #endif
1261 #ifdef TE_PE
1262   {"secrel32", pe_directive_secrel, 0},
1263   {"secidx", pe_directive_secidx, 0},
1264 #endif
1265   {0, 0, 0}
1266 };
1267
1268 /* For interface with expression ().  */
1269 extern char *input_line_pointer;
1270
1271 /* Hash table for instruction mnemonic lookup.  */
1272 static htab_t op_hash;
1273
1274 /* Hash table for register lookup.  */
1275 static htab_t reg_hash;
1276 \f
1277   /* Various efficient no-op patterns for aligning code labels.
1278      Note: Don't try to assemble the instructions in the comments.
1279      0L and 0w are not legal.  */
1280 static const unsigned char f32_1[] =
1281   {0x90};                               /* nop                  */
1282 static const unsigned char f32_2[] =
1283   {0x66,0x90};                          /* xchg %ax,%ax         */
1284 static const unsigned char f32_3[] =
1285   {0x8d,0x76,0x00};                     /* leal 0(%esi),%esi    */
1286 #define f32_4 (f32_5 + 1)       /* leal 0(%esi,%eiz),%esi */
1287 static const unsigned char f32_5[] =
1288   {0x2e,0x8d,0x74,0x26,0x00};           /* leal %cs:0(%esi,%eiz),%esi   */
1289 static const unsigned char f32_6[] =
1290   {0x8d,0xb6,0x00,0x00,0x00,0x00};      /* leal 0L(%esi),%esi   */
1291 #define f32_7 (f32_8 + 1)       /* leal 0L(%esi,%eiz),%esi */
1292 static const unsigned char f32_8[] =
1293   {0x2e,0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal %cs:0L(%esi,%eiz),%esi */
1294 static const unsigned char f64_3[] =
1295   {0x48,0x89,0xf6};                     /* mov %rsi,%rsi        */
1296 static const unsigned char f64_4[] =
1297   {0x48,0x8d,0x76,0x00};                /* lea 0(%rsi),%rsi     */
1298 #define f64_5 (f64_6 + 1)               /* lea 0(%rsi,%riz),%rsi        */
1299 static const unsigned char f64_6[] =
1300   {0x2e,0x48,0x8d,0x74,0x26,0x00};      /* lea %cs:0(%rsi,%riz),%rsi    */
1301 static const unsigned char f64_7[] =
1302   {0x48,0x8d,0xb6,0x00,0x00,0x00,0x00}; /* lea 0L(%rsi),%rsi    */
1303 #define f64_8 (f64_9 + 1)               /* lea 0L(%rsi,%riz),%rsi */
1304 static const unsigned char f64_9[] =
1305   {0x2e,0x48,0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* lea %cs:0L(%rsi,%riz),%rsi */
1306 #define f16_2 (f64_3 + 1)               /* mov %si,%si  */
1307 static const unsigned char f16_3[] =
1308   {0x8d,0x74,0x00};                     /* lea 0(%si),%si       */
1309 #define f16_4 (f16_5 + 1)               /* lea 0W(%si),%si */
1310 static const unsigned char f16_5[] =
1311   {0x2e,0x8d,0xb4,0x00,0x00};           /* lea %cs:0W(%si),%si  */
1312 static const unsigned char jump_disp8[] =
1313   {0xeb};                               /* jmp disp8           */
1314 static const unsigned char jump32_disp32[] =
1315   {0xe9};                               /* jmp disp32          */
1316 static const unsigned char jump16_disp32[] =
1317   {0x66,0xe9};                          /* jmp disp32          */
1318 /* 32-bit NOPs patterns.  */
1319 static const unsigned char *const f32_patt[] = {
1320   f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8
1321 };
1322 /* 64-bit NOPs patterns.  */
1323 static const unsigned char *const f64_patt[] = {
1324   f32_1, f32_2, f64_3, f64_4, f64_5, f64_6, f64_7, f64_8, f64_9
1325 };
1326 /* 16-bit NOPs patterns.  */
1327 static const unsigned char *const f16_patt[] = {
1328   f32_1, f16_2, f16_3, f16_4, f16_5
1329 };
1330 /* nopl (%[re]ax) */
1331 static const unsigned char alt_3[] =
1332   {0x0f,0x1f,0x00};
1333 /* nopl 0(%[re]ax) */
1334 static const unsigned char alt_4[] =
1335   {0x0f,0x1f,0x40,0x00};
1336 /* nopl 0(%[re]ax,%[re]ax,1) */
1337 #define alt_5 (alt_6 + 1)
1338 /* nopw 0(%[re]ax,%[re]ax,1) */
1339 static const unsigned char alt_6[] =
1340   {0x66,0x0f,0x1f,0x44,0x00,0x00};
1341 /* nopl 0L(%[re]ax) */
1342 static const unsigned char alt_7[] =
1343   {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
1344 /* nopl 0L(%[re]ax,%[re]ax,1) */
1345 #define alt_8 (alt_9 + 1)
1346 /* nopw 0L(%[re]ax,%[re]ax,1) */
1347 static const unsigned char alt_9[] =
1348   {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1349 /* nopw %cs:0L(%[re]ax,%[re]ax,1) */
1350 #define alt_10 (alt_11 + 1)
1351 /* data16 nopw %cs:0L(%eax,%eax,1) */
1352 static const unsigned char alt_11[] =
1353   {0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1354 /* 32-bit and 64-bit NOPs patterns.  */
1355 static const unsigned char *const alt_patt[] = {
1356   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
1357   alt_9, alt_10, alt_11
1358 };
1359 #define alt64_9 (alt64_15 + 6)          /* nopq 0L(%rax,%rax,1)  */
1360 #define alt64_10 (alt64_15 + 5)         /* cs nopq 0L(%rax,%rax,1)  */
1361 /* data16 cs nopq 0L(%rax,%rax,1)  */
1362 #define alt64_11 (alt64_15 + 4)
1363 /* data16 data16 cs nopq 0L(%rax,%rax,1)  */
1364 #define alt64_12 (alt64_15 + 3)
1365 /* data16 data16 data16 cs nopq 0L(%rax,%rax,1)  */
1366 #define alt64_13 (alt64_15 + 2)
1367 /* data16 data16 data16 data16 cs nopq 0L(%rax,%rax,1)  */
1368 #define alt64_14 (alt64_15 + 1)
1369 /* data16 data16 data16 data16 data16 cs nopq 0L(%rax,%rax,1)  */
1370 static const unsigned char alt64_15[] =
1371   {0x66,0x66,0x66,0x66,0x66,0x2e,0x48,
1372    0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1373 /* Long 64-bit NOPs patterns.  */
1374 static const unsigned char *const alt64_patt[] = {
1375   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
1376   alt64_9, alt64_10, alt64_11,alt64_12, alt64_13, alt64_14, alt64_15
1377 };
1378
1379 /* Genenerate COUNT bytes of NOPs to WHERE from PATT with the maximum
1380    size of a single NOP instruction MAX_SINGLE_NOP_SIZE.  */
1381
1382 static void
1383 i386_output_nops (char *where, const unsigned char *const *patt,
1384                   int count, int max_single_nop_size)
1385
1386 {
1387   /* Place the longer NOP first.  */
1388   int last;
1389   int offset;
1390   const unsigned char *nops;
1391
1392   if (max_single_nop_size < 1)
1393     {
1394       as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
1395                 max_single_nop_size);
1396       return;
1397     }
1398
1399   nops = patt[max_single_nop_size - 1];
1400   last = count % max_single_nop_size;
1401
1402   count -= last;
1403   for (offset = 0; offset < count; offset += max_single_nop_size)
1404     memcpy (where + offset, nops, max_single_nop_size);
1405
1406   if (last)
1407     {
1408       nops = patt[last - 1];
1409       memcpy (where + offset, nops, last);
1410     }
1411 }
1412
1413 static INLINE int
1414 fits_in_imm7 (offsetT num)
1415 {
1416   return (num & 0x7f) == num;
1417 }
1418
1419 static INLINE int
1420 fits_in_imm31 (offsetT num)
1421 {
1422   return (num & 0x7fffffff) == num;
1423 }
1424
1425 /* Genenerate COUNT bytes of NOPs to WHERE with the maximum size of a
1426    single NOP instruction LIMIT.  */
1427
1428 void
1429 i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
1430 {
1431   const unsigned char *const *patt = NULL;
1432   int max_single_nop_size;
1433   /* Maximum number of NOPs before switching to jump over NOPs.  */
1434   int max_number_of_nops;
1435
1436   switch (fragP->fr_type)
1437     {
1438     case rs_fill_nop:
1439     case rs_align_code:
1440       break;
1441     case rs_machine_dependent:
1442       /* Allow NOP padding for jumps and calls.  */
1443       if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
1444           || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
1445         break;
1446       /* Fall through.  */
1447     default:
1448       return;
1449     }
1450
1451   /* We need to decide which NOP sequence to use for 32bit and
1452      64bit. When -mtune= is used:
1453
1454      1. For PROCESSOR_I?86, PROCESSOR_PENTIUM, PROCESSOR_IAMCU, and
1455      PROCESSOR_GENERIC32, f32_patt will be used.
1456      2. For the rest, alt_patt will be used.
1457
1458      When -mtune= isn't used, alt_patt will be used if
1459      cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt/f64_patt will
1460      be used.
1461
1462      When -march= or .arch is used, we can't use anything beyond
1463      cpu_arch_isa_flags.   */
1464
1465   if (fragP->tc_frag_data.code == CODE_16BIT)
1466     {
1467       patt = f16_patt;
1468       max_single_nop_size = sizeof (f16_patt) / sizeof (f16_patt[0]);
1469       /* Limit number of NOPs to 2 in 16-bit mode.  */
1470       max_number_of_nops = 2;
1471     }
1472   else
1473     {
1474       patt = fragP->tc_frag_data.code == CODE_64BIT ? f64_patt : f32_patt;
1475       if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN)
1476         {
1477           /* PROCESSOR_UNKNOWN means that all ISAs may be used, unless
1478              explicitly disabled.  */
1479           switch (fragP->tc_frag_data.tune)
1480             {
1481             case PROCESSOR_UNKNOWN:
1482               /* We use cpu_arch_isa_flags to check if we SHOULD
1483                  optimize with nops.  */
1484               if (fragP->tc_frag_data.isanop)
1485                 patt = alt_patt;
1486               break;
1487
1488             case PROCESSOR_CORE:
1489             case PROCESSOR_CORE2:
1490             case PROCESSOR_COREI7:
1491               if (fragP->tc_frag_data.cpunop)
1492                 {
1493                   if (fragP->tc_frag_data.code == CODE_64BIT)
1494                     patt = alt64_patt;
1495                   else
1496                     patt = alt_patt;
1497                 }
1498               break;
1499
1500             case PROCESSOR_PENTIUMPRO:
1501             case PROCESSOR_PENTIUM4:
1502             case PROCESSOR_NOCONA:
1503             case PROCESSOR_GENERIC64:
1504             case PROCESSOR_K6:
1505             case PROCESSOR_ATHLON:
1506             case PROCESSOR_K8:
1507             case PROCESSOR_AMDFAM10:
1508             case PROCESSOR_BD:
1509             case PROCESSOR_ZNVER:
1510             case PROCESSOR_BT:
1511               if (fragP->tc_frag_data.cpunop)
1512                 patt = alt_patt;
1513               break;
1514
1515             case PROCESSOR_I386:
1516             case PROCESSOR_I486:
1517             case PROCESSOR_PENTIUM:
1518             case PROCESSOR_I686:
1519             case PROCESSOR_IAMCU:
1520             case PROCESSOR_GENERIC32:
1521               break;
1522             case PROCESSOR_NONE:
1523               abort ();
1524             }
1525         }
1526       else
1527         {
1528           switch (fragP->tc_frag_data.tune)
1529             {
1530             case PROCESSOR_UNKNOWN:
1531               /* When cpu_arch_isa is set, cpu_arch_tune shouldn't be
1532                  PROCESSOR_UNKNOWN.  */
1533               abort ();
1534               break;
1535
1536             default:
1537               /* We use cpu_arch_isa_flags to check if we CAN optimize
1538                  with nops.  */
1539               if (fragP->tc_frag_data.isanop)
1540                 patt = alt_patt;
1541               break;
1542
1543             case PROCESSOR_NONE:
1544               abort ();
1545             }
1546         }
1547
1548       if (patt != alt_patt && patt != alt64_patt)
1549         {
1550           max_single_nop_size = patt == f32_patt ? ARRAY_SIZE (f32_patt)
1551                                                  : ARRAY_SIZE (f64_patt);
1552           /* Limit number of NOPs to 2 for older processors.  */
1553           max_number_of_nops = 2;
1554         }
1555       else
1556         {
1557           max_single_nop_size = patt == alt_patt
1558                                 ? ARRAY_SIZE (alt_patt)
1559                                 : ARRAY_SIZE (alt64_patt);
1560           /* Limit number of NOPs to 7 for newer processors.  */
1561           max_number_of_nops = 7;
1562         }
1563     }
1564
1565   if (limit == 0)
1566     limit = max_single_nop_size;
1567
1568   if (fragP->fr_type == rs_fill_nop)
1569     {
1570       /* Output NOPs for .nop directive.  */
1571       if (limit > max_single_nop_size)
1572         {
1573           as_bad_where (fragP->fr_file, fragP->fr_line,
1574                         _("invalid single nop size: %d "
1575                           "(expect within [0, %d])"),
1576                         limit, max_single_nop_size);
1577           return;
1578         }
1579     }
1580   else if (fragP->fr_type != rs_machine_dependent)
1581     fragP->fr_var = count;
1582
1583   /* Emit a plain NOP first when the last thing we saw may not have been
1584      a proper instruction (e.g. a stand-alone prefix or .byte).  */
1585   if (!fragP->tc_frag_data.last_insn_normal)
1586     {
1587       *where++ = 0x90;
1588       --count;
1589     }
1590
1591   if ((count / max_single_nop_size) > max_number_of_nops)
1592     {
1593       /* Generate jump over NOPs.  */
1594       offsetT disp = count - 2;
1595       if (fits_in_imm7 (disp))
1596         {
1597           /* Use "jmp disp8" if possible.  */
1598           count = disp;
1599           where[0] = jump_disp8[0];
1600           where[1] = count;
1601           where += 2;
1602         }
1603       else
1604         {
1605           unsigned int size_of_jump;
1606
1607           if (flag_code == CODE_16BIT)
1608             {
1609               where[0] = jump16_disp32[0];
1610               where[1] = jump16_disp32[1];
1611               size_of_jump = 2;
1612             }
1613           else
1614             {
1615               where[0] = jump32_disp32[0];
1616               size_of_jump = 1;
1617             }
1618
1619           count -= size_of_jump + 4;
1620           if (!fits_in_imm31 (count))
1621             {
1622               as_bad_where (fragP->fr_file, fragP->fr_line,
1623                             _("jump over nop padding out of range"));
1624               return;
1625             }
1626
1627           md_number_to_chars (where + size_of_jump, count, 4);
1628           where += size_of_jump + 4;
1629         }
1630     }
1631
1632   /* Generate multiple NOPs.  */
1633   i386_output_nops (where, patt, count, limit);
1634 }
1635
1636 static INLINE int
1637 operand_type_all_zero (const union i386_operand_type *x)
1638 {
1639   switch (ARRAY_SIZE(x->array))
1640     {
1641     case 3:
1642       if (x->array[2])
1643         return 0;
1644       /* Fall through.  */
1645     case 2:
1646       if (x->array[1])
1647         return 0;
1648       /* Fall through.  */
1649     case 1:
1650       return !x->array[0];
1651     default:
1652       abort ();
1653     }
1654 }
1655
1656 static INLINE void
1657 operand_type_set (union i386_operand_type *x, unsigned int v)
1658 {
1659   switch (ARRAY_SIZE(x->array))
1660     {
1661     case 3:
1662       x->array[2] = v;
1663       /* Fall through.  */
1664     case 2:
1665       x->array[1] = v;
1666       /* Fall through.  */
1667     case 1:
1668       x->array[0] = v;
1669       /* Fall through.  */
1670       break;
1671     default:
1672       abort ();
1673     }
1674
1675   x->bitfield.class = ClassNone;
1676   x->bitfield.instance = InstanceNone;
1677 }
1678
1679 static INLINE int
1680 operand_type_equal (const union i386_operand_type *x,
1681                     const union i386_operand_type *y)
1682 {
1683   switch (ARRAY_SIZE(x->array))
1684     {
1685     case 3:
1686       if (x->array[2] != y->array[2])
1687         return 0;
1688       /* Fall through.  */
1689     case 2:
1690       if (x->array[1] != y->array[1])
1691         return 0;
1692       /* Fall through.  */
1693     case 1:
1694       return x->array[0] == y->array[0];
1695       break;
1696     default:
1697       abort ();
1698     }
1699 }
1700
1701 static INLINE bool
1702 _is_cpu (const i386_cpu_attr *a, enum i386_cpu cpu)
1703 {
1704   switch (cpu)
1705     {
1706     case Cpu287:      return a->bitfield.cpu287;
1707     case Cpu387:      return a->bitfield.cpu387;
1708     case Cpu3dnow:    return a->bitfield.cpu3dnow;
1709     case Cpu3dnowA:   return a->bitfield.cpu3dnowa;
1710     case CpuAVX:      return a->bitfield.cpuavx;
1711     case CpuHLE:      return a->bitfield.cpuhle;
1712     case CpuAVX512F:  return a->bitfield.cpuavx512f;
1713     case CpuAVX512VL: return a->bitfield.cpuavx512vl;
1714     case CpuAPX_F:    return a->bitfield.cpuapx_f;
1715     case Cpu64:       return a->bitfield.cpu64;
1716     case CpuNo64:     return a->bitfield.cpuno64;
1717     default:
1718       gas_assert (cpu < CpuAttrEnums);
1719     }
1720   return a->bitfield.isa == cpu + 1u;
1721 }
1722
1723 static INLINE bool
1724 is_cpu (const insn_template *t, enum i386_cpu cpu)
1725 {
1726   return _is_cpu(&t->cpu, cpu);
1727 }
1728
1729 static INLINE bool
1730 maybe_cpu (const insn_template *t, enum i386_cpu cpu)
1731 {
1732   return _is_cpu(&t->cpu_any, cpu);
1733 }
1734
1735 static i386_cpu_flags cpu_flags_from_attr (i386_cpu_attr a)
1736 {
1737   const unsigned int bps = sizeof (a.array[0]) * CHAR_BIT;
1738   i386_cpu_flags f = { .array[0] = 0 };
1739
1740   switch (ARRAY_SIZE (a.array))
1741     {
1742     case 1:
1743       f.array[CpuAttrEnums / bps]
1744 #ifndef WORDS_BIGENDIAN
1745         |= (a.array[0] >> CpuIsaBits) << (CpuAttrEnums % bps);
1746 #else
1747         |= (a.array[0] << CpuIsaBits) >> (CpuAttrEnums % bps);
1748 #endif
1749       if (CpuMax / bps > CpuAttrEnums / bps)
1750         f.array[CpuAttrEnums / bps + 1]
1751 #ifndef WORDS_BIGENDIAN
1752           = (a.array[0] >> CpuIsaBits) >> (bps - CpuAttrEnums % bps);
1753 #else
1754           = (a.array[0] << CpuIsaBits) << (bps - CpuAttrEnums % bps);
1755 #endif
1756       break;
1757
1758     default:
1759       abort ();
1760     }
1761
1762   if (a.bitfield.isa)
1763 #ifndef WORDS_BIGENDIAN
1764     f.array[(a.bitfield.isa - 1) / bps] |= 1u << ((a.bitfield.isa - 1) % bps);
1765 #else
1766     f.array[(a.bitfield.isa - 1) / bps] |= 1u << (~(a.bitfield.isa - 1) % bps);
1767 #endif
1768
1769   return f;
1770 }
1771
1772 static INLINE int
1773 cpu_flags_all_zero (const union i386_cpu_flags *x)
1774 {
1775   switch (ARRAY_SIZE(x->array))
1776     {
1777     case 5:
1778       if (x->array[4])
1779         return 0;
1780       /* Fall through.  */
1781     case 4:
1782       if (x->array[3])
1783         return 0;
1784       /* Fall through.  */
1785     case 3:
1786       if (x->array[2])
1787         return 0;
1788       /* Fall through.  */
1789     case 2:
1790       if (x->array[1])
1791         return 0;
1792       /* Fall through.  */
1793     case 1:
1794       return !x->array[0];
1795     default:
1796       abort ();
1797     }
1798 }
1799
1800 static INLINE int
1801 cpu_flags_equal (const union i386_cpu_flags *x,
1802                  const union i386_cpu_flags *y)
1803 {
1804   switch (ARRAY_SIZE(x->array))
1805     {
1806     case 5:
1807       if (x->array[4] != y->array[4])
1808         return 0;
1809       /* Fall through.  */
1810     case 4:
1811       if (x->array[3] != y->array[3])
1812         return 0;
1813       /* Fall through.  */
1814     case 3:
1815       if (x->array[2] != y->array[2])
1816         return 0;
1817       /* Fall through.  */
1818     case 2:
1819       if (x->array[1] != y->array[1])
1820         return 0;
1821       /* Fall through.  */
1822     case 1:
1823       return x->array[0] == y->array[0];
1824       break;
1825     default:
1826       abort ();
1827     }
1828 }
1829
1830 static INLINE int
1831 cpu_flags_check_cpu64 (const insn_template *t)
1832 {
1833   return flag_code == CODE_64BIT
1834          ? !t->cpu.bitfield.cpuno64
1835          : !t->cpu.bitfield.cpu64;
1836 }
1837
1838 static INLINE i386_cpu_flags
1839 cpu_flags_and (i386_cpu_flags x, i386_cpu_flags y)
1840 {
1841   switch (ARRAY_SIZE (x.array))
1842     {
1843     case 5:
1844       x.array [4] &= y.array [4];
1845       /* Fall through.  */
1846     case 4:
1847       x.array [3] &= y.array [3];
1848       /* Fall through.  */
1849     case 3:
1850       x.array [2] &= y.array [2];
1851       /* Fall through.  */
1852     case 2:
1853       x.array [1] &= y.array [1];
1854       /* Fall through.  */
1855     case 1:
1856       x.array [0] &= y.array [0];
1857       break;
1858     default:
1859       abort ();
1860     }
1861   return x;
1862 }
1863
1864 static INLINE i386_cpu_flags
1865 cpu_flags_or (i386_cpu_flags x, i386_cpu_flags y)
1866 {
1867   switch (ARRAY_SIZE (x.array))
1868     {
1869     case 5:
1870       x.array [4] |= y.array [4];
1871       /* Fall through.  */
1872     case 4:
1873       x.array [3] |= y.array [3];
1874       /* Fall through.  */
1875     case 3:
1876       x.array [2] |= y.array [2];
1877       /* Fall through.  */
1878     case 2:
1879       x.array [1] |= y.array [1];
1880       /* Fall through.  */
1881     case 1:
1882       x.array [0] |= y.array [0];
1883       break;
1884     default:
1885       abort ();
1886     }
1887   return x;
1888 }
1889
1890 static INLINE i386_cpu_flags
1891 cpu_flags_and_not (i386_cpu_flags x, i386_cpu_flags y)
1892 {
1893   switch (ARRAY_SIZE (x.array))
1894     {
1895     case 5:
1896       x.array [4] &= ~y.array [4];
1897       /* Fall through.  */
1898     case 4:
1899       x.array [3] &= ~y.array [3];
1900       /* Fall through.  */
1901     case 3:
1902       x.array [2] &= ~y.array [2];
1903       /* Fall through.  */
1904     case 2:
1905       x.array [1] &= ~y.array [1];
1906       /* Fall through.  */
1907     case 1:
1908       x.array [0] &= ~y.array [0];
1909       break;
1910     default:
1911       abort ();
1912     }
1913   return x;
1914 }
1915
1916 static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
1917
1918 static INLINE bool need_evex_encoding (const insn_template *t)
1919 {
1920   return i.encoding == encoding_evex
1921         || i.encoding == encoding_evex512
1922         || (t->opcode_modifier.vex && i.encoding == encoding_egpr)
1923         || i.mask.reg;
1924 }
1925
1926 #define CPU_FLAGS_ARCH_MATCH            0x1
1927 #define CPU_FLAGS_64BIT_MATCH           0x2
1928
1929 #define CPU_FLAGS_PERFECT_MATCH \
1930   (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_64BIT_MATCH)
1931
1932 /* Return CPU flags match bits. */
1933
1934 static int
1935 cpu_flags_match (const insn_template *t)
1936 {
1937   i386_cpu_flags cpu, active, all = cpu_flags_from_attr (t->cpu);
1938   i386_cpu_flags any = cpu_flags_from_attr (t->cpu_any);
1939   int match = cpu_flags_check_cpu64 (t) ? CPU_FLAGS_64BIT_MATCH : 0;
1940
1941   all.bitfield.cpu64 = 0;
1942   all.bitfield.cpuno64 = 0;
1943   gas_assert (!any.bitfield.cpu64);
1944   gas_assert (!any.bitfield.cpuno64);
1945
1946   if (cpu_flags_all_zero (&all) && cpu_flags_all_zero (&any))
1947     {
1948       /* This instruction is available on all archs.  */
1949       return match | CPU_FLAGS_ARCH_MATCH;
1950     }
1951
1952   /* This instruction is available only on some archs.  */
1953
1954   /* Dual VEX/EVEX templates may need stripping of one of the flags.  */
1955   if (t->opcode_modifier.vex && t->opcode_modifier.evex)
1956     {
1957       /* Dual AVX/AVX512 templates need to retain AVX512* only if we already
1958          know that EVEX encoding will be needed.  */
1959       if ((any.bitfield.cpuavx || any.bitfield.cpuavx2 || any.bitfield.cpufma)
1960           && (any.bitfield.cpuavx512f || any.bitfield.cpuavx512vl))
1961         {
1962           if (need_evex_encoding (t))
1963             {
1964               any.bitfield.cpuavx = 0;
1965               any.bitfield.cpuavx2 = 0;
1966               any.bitfield.cpufma = 0;
1967             }
1968           /* need_evex_encoding(t) isn't reliable before operands were
1969              parsed.  */
1970           else if (i.operands)
1971             {
1972               any.bitfield.cpuavx512f = 0;
1973               any.bitfield.cpuavx512vl = 0;
1974             }
1975         }
1976
1977       /* Dual non-APX/APX templates need massaging from what APX_F() in the
1978          opcode table has produced.  While the direct transformation of the
1979          incoming cpuid&(cpuid|APX_F) would be to cpuid&(cpuid) / cpuid&(APX_F)
1980          respectively, it's cheaper to move to just cpuid / cpuid&APX_F
1981          instead.  */
1982       if (any.bitfield.cpuapx_f
1983           && (any.bitfield.cpubmi || any.bitfield.cpubmi2
1984               || any.bitfield.cpuavx512f || any.bitfield.cpuavx512bw
1985               || any.bitfield.cpuavx512dq || any.bitfield.cpuamx_tile
1986               || any.bitfield.cpucmpccxadd || any.bitfield.cpuuser_msr))
1987         {
1988           /* These checks (verifying that APX_F() was properly used in the
1989              opcode table entry) make sure there's no need for an "else" to
1990              the "if()" below.  */
1991           gas_assert (!cpu_flags_all_zero (&all));
1992           cpu = cpu_flags_and (all, any);
1993           gas_assert (cpu_flags_equal (&cpu, &all));
1994
1995           if (need_evex_encoding (t))
1996             all = any;
1997
1998           memset (&any, 0, sizeof (any));
1999         }
2000     }
2001
2002   if (flag_code != CODE_64BIT)
2003     active = cpu_flags_and_not (cpu_arch_flags, cpu_64_flags);
2004   else
2005     active = cpu_arch_flags;
2006   cpu = cpu_flags_and (all, active);
2007   if (cpu_flags_equal (&cpu, &all))
2008     {
2009       /* AVX and AVX2 present at the same time express an operand size
2010          dependency - strip AVX2 for the purposes here.  The operand size
2011          dependent check occurs in check_vecOperands().  */
2012       if (any.bitfield.cpuavx && any.bitfield.cpuavx2)
2013         any.bitfield.cpuavx2 = 0;
2014
2015       cpu = cpu_flags_and (any, active);
2016       if (cpu_flags_all_zero (&any) || !cpu_flags_all_zero (&cpu))
2017         match |= CPU_FLAGS_ARCH_MATCH;
2018     }
2019   return match;
2020 }
2021
2022 static INLINE i386_operand_type
2023 operand_type_and (i386_operand_type x, i386_operand_type y)
2024 {
2025   if (x.bitfield.class != y.bitfield.class)
2026     x.bitfield.class = ClassNone;
2027   if (x.bitfield.instance != y.bitfield.instance)
2028     x.bitfield.instance = InstanceNone;
2029
2030   switch (ARRAY_SIZE (x.array))
2031     {
2032     case 3:
2033       x.array [2] &= y.array [2];
2034       /* Fall through.  */
2035     case 2:
2036       x.array [1] &= y.array [1];
2037       /* Fall through.  */
2038     case 1:
2039       x.array [0] &= y.array [0];
2040       break;
2041     default:
2042       abort ();
2043     }
2044   return x;
2045 }
2046
2047 static INLINE i386_operand_type
2048 operand_type_and_not (i386_operand_type x, i386_operand_type y)
2049 {
2050   gas_assert (y.bitfield.class == ClassNone);
2051   gas_assert (y.bitfield.instance == InstanceNone);
2052
2053   switch (ARRAY_SIZE (x.array))
2054     {
2055     case 3:
2056       x.array [2] &= ~y.array [2];
2057       /* Fall through.  */
2058     case 2:
2059       x.array [1] &= ~y.array [1];
2060       /* Fall through.  */
2061     case 1:
2062       x.array [0] &= ~y.array [0];
2063       break;
2064     default:
2065       abort ();
2066     }
2067   return x;
2068 }
2069
2070 static INLINE i386_operand_type
2071 operand_type_or (i386_operand_type x, i386_operand_type y)
2072 {
2073   gas_assert (x.bitfield.class == ClassNone ||
2074               y.bitfield.class == ClassNone ||
2075               x.bitfield.class == y.bitfield.class);
2076   gas_assert (x.bitfield.instance == InstanceNone ||
2077               y.bitfield.instance == InstanceNone ||
2078               x.bitfield.instance == y.bitfield.instance);
2079
2080   switch (ARRAY_SIZE (x.array))
2081     {
2082     case 3:
2083       x.array [2] |= y.array [2];
2084       /* Fall through.  */
2085     case 2:
2086       x.array [1] |= y.array [1];
2087       /* Fall through.  */
2088     case 1:
2089       x.array [0] |= y.array [0];
2090       break;
2091     default:
2092       abort ();
2093     }
2094   return x;
2095 }
2096
2097 static INLINE i386_operand_type
2098 operand_type_xor (i386_operand_type x, i386_operand_type y)
2099 {
2100   gas_assert (y.bitfield.class == ClassNone);
2101   gas_assert (y.bitfield.instance == InstanceNone);
2102
2103   switch (ARRAY_SIZE (x.array))
2104     {
2105     case 3:
2106       x.array [2] ^= y.array [2];
2107       /* Fall through.  */
2108     case 2:
2109       x.array [1] ^= y.array [1];
2110       /* Fall through.  */
2111     case 1:
2112       x.array [0] ^= y.array [0];
2113       break;
2114     default:
2115       abort ();
2116     }
2117   return x;
2118 }
2119
2120 static const i386_operand_type anydisp = {
2121   .bitfield = { .disp8 = 1, .disp16 = 1, .disp32 = 1, .disp64 = 1 }
2122 };
2123
2124 enum operand_type
2125 {
2126   reg,
2127   imm,
2128   disp,
2129   anymem
2130 };
2131
2132 static INLINE int
2133 operand_type_check (i386_operand_type t, enum operand_type c)
2134 {
2135   switch (c)
2136     {
2137     case reg:
2138       return t.bitfield.class == Reg;
2139
2140     case imm:
2141       return (t.bitfield.imm8
2142               || t.bitfield.imm8s
2143               || t.bitfield.imm16
2144               || t.bitfield.imm32
2145               || t.bitfield.imm32s
2146               || t.bitfield.imm64);
2147
2148     case disp:
2149       return (t.bitfield.disp8
2150               || t.bitfield.disp16
2151               || t.bitfield.disp32
2152               || t.bitfield.disp64);
2153
2154     case anymem:
2155       return (t.bitfield.disp8
2156               || t.bitfield.disp16
2157               || t.bitfield.disp32
2158               || t.bitfield.disp64
2159               || t.bitfield.baseindex);
2160
2161     default:
2162       abort ();
2163     }
2164
2165   return 0;
2166 }
2167
2168 /* Return 1 if there is no conflict in 8bit/16bit/32bit/64bit/80bit size
2169    between operand GIVEN and opeand WANTED for instruction template T.  */
2170
2171 static INLINE int
2172 match_operand_size (const insn_template *t, unsigned int wanted,
2173                     unsigned int given)
2174 {
2175   return !((i.types[given].bitfield.byte
2176             && !t->operand_types[wanted].bitfield.byte)
2177            || (i.types[given].bitfield.word
2178                && !t->operand_types[wanted].bitfield.word)
2179            || (i.types[given].bitfield.dword
2180                && !t->operand_types[wanted].bitfield.dword)
2181            || (i.types[given].bitfield.qword
2182                && (!t->operand_types[wanted].bitfield.qword
2183                    /* Don't allow 64-bit (memory) operands outside of 64-bit
2184                       mode, when they're used where a 64-bit GPR could also
2185                       be used.  Checking is needed for Intel Syntax only.  */
2186                    || (intel_syntax
2187                        && flag_code != CODE_64BIT
2188                        && (t->operand_types[wanted].bitfield.class == Reg
2189                            || t->operand_types[wanted].bitfield.class == Accum
2190                            || t->opcode_modifier.isstring))))
2191            || (i.types[given].bitfield.tbyte
2192                && !t->operand_types[wanted].bitfield.tbyte));
2193 }
2194
2195 /* Return 1 if there is no conflict in SIMD register between operand
2196    GIVEN and opeand WANTED for instruction template T.  */
2197
2198 static INLINE int
2199 match_simd_size (const insn_template *t, unsigned int wanted,
2200                  unsigned int given)
2201 {
2202   return !((i.types[given].bitfield.xmmword
2203             && !t->operand_types[wanted].bitfield.xmmword)
2204            || (i.types[given].bitfield.ymmword
2205                && !t->operand_types[wanted].bitfield.ymmword)
2206            || (i.types[given].bitfield.zmmword
2207                && !t->operand_types[wanted].bitfield.zmmword)
2208            || (i.types[given].bitfield.tmmword
2209                && !t->operand_types[wanted].bitfield.tmmword));
2210 }
2211
2212 /* Return 1 if there is no conflict in any size between operand GIVEN
2213    and opeand WANTED for instruction template T.  */
2214
2215 static INLINE int
2216 match_mem_size (const insn_template *t, unsigned int wanted,
2217                 unsigned int given)
2218 {
2219   return (match_operand_size (t, wanted, given)
2220           && !((i.types[given].bitfield.unspecified
2221                 && !i.broadcast.type
2222                 && !i.broadcast.bytes
2223                 && !t->operand_types[wanted].bitfield.unspecified)
2224                || (i.types[given].bitfield.fword
2225                    && !t->operand_types[wanted].bitfield.fword)
2226                /* For scalar opcode templates to allow register and memory
2227                   operands at the same time, some special casing is needed
2228                   here.  Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
2229                   down-conversion vpmov*.  */
2230                || ((t->operand_types[wanted].bitfield.class == RegSIMD
2231                     && t->operand_types[wanted].bitfield.byte
2232                        + t->operand_types[wanted].bitfield.word
2233                        + t->operand_types[wanted].bitfield.dword
2234                        + t->operand_types[wanted].bitfield.qword
2235                        > !!t->opcode_modifier.broadcast)
2236                    ? (i.types[given].bitfield.xmmword
2237                       || i.types[given].bitfield.ymmword
2238                       || i.types[given].bitfield.zmmword)
2239                    : !match_simd_size(t, wanted, given))));
2240 }
2241
2242 /* Return value has MATCH_STRAIGHT set if there is no size conflict on any
2243    operands for instruction template T, and it has MATCH_REVERSE set if there
2244    is no size conflict on any operands for the template with operands reversed
2245    (and the template allows for reversing in the first place).  */
2246
2247 #define MATCH_STRAIGHT 1
2248 #define MATCH_REVERSE  2
2249
2250 static INLINE unsigned int
2251 operand_size_match (const insn_template *t)
2252 {
2253   unsigned int j, match = MATCH_STRAIGHT;
2254
2255   /* Don't check non-absolute jump instructions.  */
2256   if (t->opcode_modifier.jump
2257       && t->opcode_modifier.jump != JUMP_ABSOLUTE)
2258     return match;
2259
2260   /* Check memory and accumulator operand size.  */
2261   for (j = 0; j < i.operands; j++)
2262     {
2263       if (i.types[j].bitfield.class != Reg
2264           && i.types[j].bitfield.class != RegSIMD
2265           && t->opcode_modifier.operandconstraint == ANY_SIZE)
2266         continue;
2267
2268       if (t->operand_types[j].bitfield.class == Reg
2269           && !match_operand_size (t, j, j))
2270         {
2271           match = 0;
2272           break;
2273         }
2274
2275       if (t->operand_types[j].bitfield.class == RegSIMD
2276           && !match_simd_size (t, j, j))
2277         {
2278           match = 0;
2279           break;
2280         }
2281
2282       if (t->operand_types[j].bitfield.instance == Accum
2283           && (!match_operand_size (t, j, j) || !match_simd_size (t, j, j)))
2284         {
2285           match = 0;
2286           break;
2287         }
2288
2289       if ((i.flags[j] & Operand_Mem) && !match_mem_size (t, j, j))
2290         {
2291           match = 0;
2292           break;
2293         }
2294     }
2295
2296   if (!t->opcode_modifier.d)
2297     return match;
2298
2299   /* Check reverse.  */
2300   gas_assert (i.operands >= 2);
2301
2302   for (j = 0; j < i.operands; j++)
2303     {
2304       unsigned int given = i.operands - j - 1;
2305
2306       /* For FMA4 and XOP insns VEX.W controls just the first two
2307          register operands. And APX_F insns just swap the two source operands,
2308          with the 3rd one being the destination.  */
2309       if (is_cpu (t, CpuFMA4) || is_cpu (t, CpuXOP)
2310           || is_cpu (t, CpuAPX_F))
2311         given = j < 2 ? 1 - j : j;
2312
2313       if (t->operand_types[j].bitfield.class == Reg
2314           && !match_operand_size (t, j, given))
2315         return match;
2316
2317       if (t->operand_types[j].bitfield.class == RegSIMD
2318           && !match_simd_size (t, j, given))
2319         return match;
2320
2321       if (t->operand_types[j].bitfield.instance == Accum
2322           && (!match_operand_size (t, j, given)
2323               || !match_simd_size (t, j, given)))
2324         return match;
2325
2326       if ((i.flags[given] & Operand_Mem) && !match_mem_size (t, j, given))
2327         return match;
2328     }
2329
2330   return match | MATCH_REVERSE;
2331 }
2332
2333 static INLINE int
2334 operand_type_match (i386_operand_type overlap,
2335                     i386_operand_type given)
2336 {
2337   i386_operand_type temp = overlap;
2338
2339   temp.bitfield.unspecified = 0;
2340   temp.bitfield.byte = 0;
2341   temp.bitfield.word = 0;
2342   temp.bitfield.dword = 0;
2343   temp.bitfield.fword = 0;
2344   temp.bitfield.qword = 0;
2345   temp.bitfield.tbyte = 0;
2346   temp.bitfield.xmmword = 0;
2347   temp.bitfield.ymmword = 0;
2348   temp.bitfield.zmmword = 0;
2349   temp.bitfield.tmmword = 0;
2350   if (operand_type_all_zero (&temp))
2351     goto mismatch;
2352
2353   if (given.bitfield.baseindex == overlap.bitfield.baseindex)
2354     return 1;
2355
2356  mismatch:
2357   i.error = operand_type_mismatch;
2358   return 0;
2359 }
2360
2361 /* If given types g0 and g1 are registers they must be of the same type
2362    unless the expected operand type register overlap is null.
2363    Intel syntax sized memory operands are also checked here.  */
2364
2365 static INLINE int
2366 operand_type_register_match (i386_operand_type g0,
2367                              i386_operand_type t0,
2368                              i386_operand_type g1,
2369                              i386_operand_type t1)
2370 {
2371   if (g0.bitfield.class != Reg
2372       && g0.bitfield.class != RegSIMD
2373       && (g0.bitfield.unspecified
2374           || !operand_type_check (g0, anymem)))
2375     return 1;
2376
2377   if (g1.bitfield.class != Reg
2378       && g1.bitfield.class != RegSIMD
2379       && (g1.bitfield.unspecified
2380           || !operand_type_check (g1, anymem)))
2381     return 1;
2382
2383   if (g0.bitfield.byte == g1.bitfield.byte
2384       && g0.bitfield.word == g1.bitfield.word
2385       && g0.bitfield.dword == g1.bitfield.dword
2386       && g0.bitfield.qword == g1.bitfield.qword
2387       && g0.bitfield.xmmword == g1.bitfield.xmmword
2388       && g0.bitfield.ymmword == g1.bitfield.ymmword
2389       && g0.bitfield.zmmword == g1.bitfield.zmmword)
2390     return 1;
2391
2392   /* If expectations overlap in no more than a single size, all is fine. */
2393   g0 = operand_type_and (t0, t1);
2394   if (g0.bitfield.byte
2395       + g0.bitfield.word
2396       + g0.bitfield.dword
2397       + g0.bitfield.qword
2398       + g0.bitfield.xmmword
2399       + g0.bitfield.ymmword
2400       + g0.bitfield.zmmword <= 1)
2401     return 1;
2402
2403   i.error = register_type_mismatch;
2404
2405   return 0;
2406 }
2407
2408 static INLINE unsigned int
2409 register_number (const reg_entry *r)
2410 {
2411   unsigned int nr = r->reg_num;
2412
2413   if (r->reg_flags & RegRex)
2414     nr += 8;
2415
2416   if (r->reg_flags & (RegVRex | RegRex2))
2417     nr += 16;
2418
2419   return nr;
2420 }
2421
2422 static INLINE unsigned int
2423 mode_from_disp_size (i386_operand_type t)
2424 {
2425   if (t.bitfield.disp8)
2426     return 1;
2427   else if (t.bitfield.disp16
2428            || t.bitfield.disp32)
2429     return 2;
2430   else
2431     return 0;
2432 }
2433
2434 static INLINE int
2435 fits_in_signed_byte (addressT num)
2436 {
2437   return num + 0x80 <= 0xff;
2438 }
2439
2440 static INLINE int
2441 fits_in_unsigned_byte (addressT num)
2442 {
2443   return num <= 0xff;
2444 }
2445
2446 static INLINE int
2447 fits_in_unsigned_word (addressT num)
2448 {
2449   return num <= 0xffff;
2450 }
2451
2452 static INLINE int
2453 fits_in_signed_word (addressT num)
2454 {
2455   return num + 0x8000 <= 0xffff;
2456 }
2457
2458 static INLINE int
2459 fits_in_signed_long (addressT num ATTRIBUTE_UNUSED)
2460 {
2461 #ifndef BFD64
2462   return 1;
2463 #else
2464   return num + 0x80000000 <= 0xffffffff;
2465 #endif
2466 }                               /* fits_in_signed_long() */
2467
2468 static INLINE int
2469 fits_in_unsigned_long (addressT num ATTRIBUTE_UNUSED)
2470 {
2471 #ifndef BFD64
2472   return 1;
2473 #else
2474   return num <= 0xffffffff;
2475 #endif
2476 }                               /* fits_in_unsigned_long() */
2477
2478 static INLINE valueT extend_to_32bit_address (addressT num)
2479 {
2480 #ifdef BFD64
2481   if (fits_in_unsigned_long(num))
2482     return (num ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
2483
2484   if (!fits_in_signed_long (num))
2485     return num & 0xffffffff;
2486 #endif
2487
2488   return num;
2489 }
2490
2491 static INLINE int
2492 fits_in_disp8 (offsetT num)
2493 {
2494   int shift = i.memshift;
2495   unsigned int mask;
2496
2497   if (shift == -1)
2498     abort ();
2499
2500   mask = (1 << shift) - 1;
2501
2502   /* Return 0 if NUM isn't properly aligned.  */
2503   if ((num & mask))
2504     return 0;
2505
2506   /* Check if NUM will fit in 8bit after shift.  */
2507   return fits_in_signed_byte (num >> shift);
2508 }
2509
2510 static INLINE int
2511 fits_in_imm4 (offsetT num)
2512 {
2513   /* Despite the name, check for imm3 if we're dealing with EVEX.  */
2514   return (num & (i.encoding != encoding_evex
2515                  && i.encoding != encoding_egpr ? 0xf : 7)) == num;
2516 }
2517
2518 static i386_operand_type
2519 smallest_imm_type (offsetT num)
2520 {
2521   i386_operand_type t;
2522
2523   operand_type_set (&t, 0);
2524   t.bitfield.imm64 = 1;
2525
2526   if (cpu_arch_tune != PROCESSOR_I486 && num == 1)
2527     {
2528       /* This code is disabled on the 486 because all the Imm1 forms
2529          in the opcode table are slower on the i486.  They're the
2530          versions with the implicitly specified single-position
2531          displacement, which has another syntax if you really want to
2532          use that form.  */
2533       t.bitfield.imm1 = 1;
2534       t.bitfield.imm8 = 1;
2535       t.bitfield.imm8s = 1;
2536       t.bitfield.imm16 = 1;
2537       t.bitfield.imm32 = 1;
2538       t.bitfield.imm32s = 1;
2539     }
2540   else if (fits_in_signed_byte (num))
2541     {
2542       if (fits_in_unsigned_byte (num))
2543         t.bitfield.imm8 = 1;
2544       t.bitfield.imm8s = 1;
2545       t.bitfield.imm16 = 1;
2546       if (flag_code != CODE_64BIT || fits_in_unsigned_long (num))
2547         t.bitfield.imm32 = 1;
2548       t.bitfield.imm32s = 1;
2549     }
2550   else if (fits_in_unsigned_byte (num))
2551     {
2552       t.bitfield.imm8 = 1;
2553       t.bitfield.imm16 = 1;
2554       t.bitfield.imm32 = 1;
2555       t.bitfield.imm32s = 1;
2556     }
2557   else if (fits_in_signed_word (num) || fits_in_unsigned_word (num))
2558     {
2559       t.bitfield.imm16 = 1;
2560       if (flag_code != CODE_64BIT || fits_in_unsigned_long (num))
2561         t.bitfield.imm32 = 1;
2562       t.bitfield.imm32s = 1;
2563     }
2564   else if (fits_in_signed_long (num))
2565     {
2566       if (flag_code != CODE_64BIT || fits_in_unsigned_long (num))
2567         t.bitfield.imm32 = 1;
2568       t.bitfield.imm32s = 1;
2569     }
2570   else if (fits_in_unsigned_long (num))
2571     t.bitfield.imm32 = 1;
2572
2573   return t;
2574 }
2575
2576 static offsetT
2577 offset_in_range (offsetT val, int size)
2578 {
2579   addressT mask;
2580
2581   switch (size)
2582     {
2583     case 1: mask = ((addressT) 1 <<  8) - 1; break;
2584     case 2: mask = ((addressT) 1 << 16) - 1; break;
2585 #ifdef BFD64
2586     case 4: mask = ((addressT) 1 << 32) - 1; break;
2587 #endif
2588     case sizeof (val): return val;
2589     default: abort ();
2590     }
2591
2592   if ((val & ~mask) != 0 && (-val & ~mask) != 0)
2593     as_warn (_("0x%" PRIx64 " shortened to 0x%" PRIx64),
2594              (uint64_t) val, (uint64_t) (val & mask));
2595
2596   return val & mask;
2597 }
2598
2599 static INLINE const char *insn_name (const insn_template *t)
2600 {
2601   return &i386_mnemonics[t->mnem_off];
2602 }
2603
2604 enum PREFIX_GROUP
2605 {
2606   PREFIX_EXIST = 0,
2607   PREFIX_LOCK,
2608   PREFIX_REP,
2609   PREFIX_DS,
2610   PREFIX_OTHER
2611 };
2612
2613 /* Returns
2614    a. PREFIX_EXIST if attempting to add a prefix where one from the
2615    same class already exists.
2616    b. PREFIX_LOCK if lock prefix is added.
2617    c. PREFIX_REP if rep/repne prefix is added.
2618    d. PREFIX_DS if ds prefix is added.
2619    e. PREFIX_OTHER if other prefix is added.
2620  */
2621
2622 static enum PREFIX_GROUP
2623 add_prefix (unsigned int prefix)
2624 {
2625   enum PREFIX_GROUP ret = PREFIX_OTHER;
2626   unsigned int q;
2627
2628   if (prefix >= REX_OPCODE && prefix < REX_OPCODE + 16
2629       && flag_code == CODE_64BIT)
2630     {
2631       if ((i.prefix[REX_PREFIX] & prefix & REX_W)
2632           || (i.prefix[REX_PREFIX] & prefix & REX_R)
2633           || (i.prefix[REX_PREFIX] & prefix & REX_X)
2634           || (i.prefix[REX_PREFIX] & prefix & REX_B))
2635         ret = PREFIX_EXIST;
2636       q = REX_PREFIX;
2637     }
2638   else
2639     {
2640       switch (prefix)
2641         {
2642         default:
2643           abort ();
2644
2645         case DS_PREFIX_OPCODE:
2646           ret = PREFIX_DS;
2647           /* Fall through.  */
2648         case CS_PREFIX_OPCODE:
2649         case ES_PREFIX_OPCODE:
2650         case FS_PREFIX_OPCODE:
2651         case GS_PREFIX_OPCODE:
2652         case SS_PREFIX_OPCODE:
2653           q = SEG_PREFIX;
2654           break;
2655
2656         case REPNE_PREFIX_OPCODE:
2657         case REPE_PREFIX_OPCODE:
2658           q = REP_PREFIX;
2659           ret = PREFIX_REP;
2660           break;
2661
2662         case LOCK_PREFIX_OPCODE:
2663           q = LOCK_PREFIX;
2664           ret = PREFIX_LOCK;
2665           break;
2666
2667         case FWAIT_OPCODE:
2668           q = WAIT_PREFIX;
2669           break;
2670
2671         case ADDR_PREFIX_OPCODE:
2672           q = ADDR_PREFIX;
2673           break;
2674
2675         case DATA_PREFIX_OPCODE:
2676           q = DATA_PREFIX;
2677           break;
2678         }
2679       if (i.prefix[q] != 0)
2680         ret = PREFIX_EXIST;
2681     }
2682
2683   if (ret)
2684     {
2685       if (!i.prefix[q])
2686         ++i.prefixes;
2687       i.prefix[q] |= prefix;
2688     }
2689   else
2690     as_bad (_("same type of prefix used twice"));
2691
2692   return ret;
2693 }
2694
2695 static void
2696 update_code_flag (int value, int check)
2697 {
2698   PRINTF_LIKE ((*as_error)) = check ? as_fatal : as_bad;
2699
2700   if (value == CODE_64BIT && !cpu_arch_flags.bitfield.cpu64 )
2701     {
2702       as_error (_("64bit mode not supported on `%s'."),
2703                 cpu_arch_name ? cpu_arch_name : default_arch);
2704       return;
2705     }
2706
2707   if (value == CODE_32BIT && !cpu_arch_flags.bitfield.cpui386)
2708     {
2709       as_error (_("32bit mode not supported on `%s'."),
2710                 cpu_arch_name ? cpu_arch_name : default_arch);
2711       return;
2712     }
2713
2714   flag_code = (enum flag_code) value;
2715
2716   stackop_size = '\0';
2717 }
2718
2719 static void
2720 set_code_flag (int value)
2721 {
2722   update_code_flag (value, 0);
2723 }
2724
2725 static void
2726 set_16bit_gcc_code_flag (int new_code_flag)
2727 {
2728   flag_code = (enum flag_code) new_code_flag;
2729   if (flag_code != CODE_16BIT)
2730     abort ();
2731   stackop_size = LONG_MNEM_SUFFIX;
2732 }
2733
2734 static void
2735 _set_intel_syntax (int syntax_flag)
2736 {
2737   intel_syntax = syntax_flag;
2738
2739   expr_set_rank (O_full_ptr, syntax_flag ? 10 : 0);
2740
2741   register_prefix = allow_naked_reg ? "" : "%";
2742 }
2743
2744 static void
2745 set_intel_syntax (int syntax_flag)
2746 {
2747   /* Find out if register prefixing is specified.  */
2748   int ask_naked_reg = 0;
2749
2750   SKIP_WHITESPACE ();
2751   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2752     {
2753       char *string;
2754       int e = get_symbol_name (&string);
2755
2756       if (strcmp (string, "prefix") == 0)
2757         ask_naked_reg = 1;
2758       else if (strcmp (string, "noprefix") == 0)
2759         ask_naked_reg = -1;
2760       else
2761         as_bad (_("bad argument to syntax directive."));
2762       (void) restore_line_pointer (e);
2763     }
2764   demand_empty_rest_of_line ();
2765
2766   if (ask_naked_reg == 0)
2767     allow_naked_reg = (syntax_flag
2768                        && (bfd_get_symbol_leading_char (stdoutput) != '\0'));
2769   else
2770     allow_naked_reg = (ask_naked_reg < 0);
2771
2772   _set_intel_syntax (syntax_flag);
2773 }
2774
2775 static void
2776 set_intel_mnemonic (int mnemonic_flag)
2777 {
2778   intel_mnemonic = mnemonic_flag;
2779 }
2780
2781 static void
2782 set_allow_index_reg (int flag)
2783 {
2784   allow_index_reg = flag;
2785 }
2786
2787 static void
2788 set_check (int what)
2789 {
2790   enum check_kind *kind;
2791   const char *str;
2792
2793   if (what)
2794     {
2795       kind = &operand_check;
2796       str = "operand";
2797     }
2798   else
2799     {
2800       kind = &sse_check;
2801       str = "sse";
2802     }
2803
2804   SKIP_WHITESPACE ();
2805
2806   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2807     {
2808       char *string;
2809       int e = get_symbol_name (&string);
2810
2811       if (strcmp (string, "none") == 0)
2812         *kind = check_none;
2813       else if (strcmp (string, "warning") == 0)
2814         *kind = check_warning;
2815       else if (strcmp (string, "error") == 0)
2816         *kind = check_error;
2817       else
2818         as_bad (_("bad argument to %s_check directive."), str);
2819       (void) restore_line_pointer (e);
2820     }
2821   else
2822     as_bad (_("missing argument for %s_check directive"), str);
2823
2824   demand_empty_rest_of_line ();
2825 }
2826
2827 static void
2828 check_cpu_arch_compatible (const char *name ATTRIBUTE_UNUSED,
2829                            i386_cpu_flags new_flag ATTRIBUTE_UNUSED)
2830 {
2831 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
2832   static const char *arch;
2833
2834   /* Intel MCU is only supported on ELF.  */
2835   if (!IS_ELF)
2836     return;
2837
2838   if (!arch)
2839     {
2840       /* Use cpu_arch_name if it is set in md_parse_option.  Otherwise
2841          use default_arch.  */
2842       arch = cpu_arch_name;
2843       if (!arch)
2844         arch = default_arch;
2845     }
2846
2847   /* If we are targeting Intel MCU, we must enable it.  */
2848   if ((get_elf_backend_data (stdoutput)->elf_machine_code == EM_IAMCU)
2849       == new_flag.bitfield.cpuiamcu)
2850     return;
2851
2852   as_bad (_("`%s' is not supported on `%s'"), name, arch);
2853 #endif
2854 }
2855
2856 static void
2857 extend_cpu_sub_arch_name (const char *pfx, const char *name)
2858 {
2859   if (cpu_sub_arch_name)
2860     cpu_sub_arch_name = reconcat (cpu_sub_arch_name, cpu_sub_arch_name,
2861                                   pfx, name, (const char *) NULL);
2862   else
2863     cpu_sub_arch_name = concat (pfx, name, (const char *) NULL);
2864 }
2865
2866 static void isa_enable (unsigned int idx)
2867 {
2868   i386_cpu_flags flags = cpu_flags_or (cpu_arch_flags, cpu_arch[idx].enable);
2869
2870   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2871     {
2872       extend_cpu_sub_arch_name (".", cpu_arch[idx].name);
2873       cpu_arch_flags = flags;
2874     }
2875
2876   cpu_arch_isa_flags = cpu_flags_or (cpu_arch_isa_flags, cpu_arch[idx].enable);
2877 }
2878
2879 static void isa_disable (unsigned int idx)
2880 {
2881   i386_cpu_flags flags
2882     = cpu_flags_and_not (cpu_arch_flags, cpu_arch[idx].disable);
2883
2884   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2885     {
2886       extend_cpu_sub_arch_name (".no", cpu_arch[idx].name);
2887       cpu_arch_flags = flags;
2888     }
2889
2890   cpu_arch_isa_flags
2891     = cpu_flags_and_not (cpu_arch_isa_flags, cpu_arch[idx].disable);
2892 }
2893
2894 static void
2895 set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
2896 {
2897   typedef struct arch_stack_entry
2898   {
2899     const struct arch_stack_entry *prev;
2900     const char *name;
2901     char *sub_name;
2902     i386_cpu_flags flags;
2903     i386_cpu_flags isa_flags;
2904     enum processor_type isa;
2905     enum flag_code flag_code;
2906     unsigned int vector_size;
2907     char stackop_size;
2908     bool no_cond_jump_promotion;
2909   } arch_stack_entry;
2910   static const arch_stack_entry *arch_stack_top;
2911   char *s;
2912   int e;
2913   const char *string;
2914   unsigned int j = 0;
2915
2916   SKIP_WHITESPACE ();
2917
2918   if (is_end_of_line[(unsigned char) *input_line_pointer])
2919     {
2920       as_bad (_("missing cpu architecture"));
2921       input_line_pointer++;
2922       return;
2923     }
2924
2925   e = get_symbol_name (&s);
2926   string = s;
2927
2928   if (strcmp (string, "push") == 0)
2929     {
2930       arch_stack_entry *top = XNEW (arch_stack_entry);
2931
2932       top->name = cpu_arch_name;
2933       if (cpu_sub_arch_name)
2934         top->sub_name = xstrdup (cpu_sub_arch_name);
2935       else
2936         top->sub_name = NULL;
2937       top->flags = cpu_arch_flags;
2938       top->isa = cpu_arch_isa;
2939       top->isa_flags = cpu_arch_isa_flags;
2940       top->flag_code = flag_code;
2941       top->vector_size = vector_size;
2942       top->stackop_size = stackop_size;
2943       top->no_cond_jump_promotion = no_cond_jump_promotion;
2944
2945       top->prev = arch_stack_top;
2946       arch_stack_top = top;
2947
2948       (void) restore_line_pointer (e);
2949       demand_empty_rest_of_line ();
2950       return;
2951     }
2952
2953   if (strcmp (string, "pop") == 0)
2954     {
2955       const arch_stack_entry *top = arch_stack_top;
2956
2957       if (!top)
2958         as_bad (_(".arch stack is empty"));
2959       else if (top->flag_code != flag_code
2960                || top->stackop_size != stackop_size)
2961         {
2962           static const unsigned int bits[] = {
2963             [CODE_16BIT] = 16,
2964             [CODE_32BIT] = 32,
2965             [CODE_64BIT] = 64,
2966           };
2967
2968           as_bad (_("this `.arch pop' requires `.code%u%s' to be in effect"),
2969                   bits[top->flag_code],
2970                   top->stackop_size == LONG_MNEM_SUFFIX ? "gcc" : "");
2971         }
2972       else
2973         {
2974           arch_stack_top = top->prev;
2975
2976           cpu_arch_name = top->name;
2977           free (cpu_sub_arch_name);
2978           cpu_sub_arch_name = top->sub_name;
2979           cpu_arch_flags = top->flags;
2980           cpu_arch_isa = top->isa;
2981           cpu_arch_isa_flags = top->isa_flags;
2982           vector_size = top->vector_size;
2983           no_cond_jump_promotion = top->no_cond_jump_promotion;
2984
2985           XDELETE (top);
2986         }
2987
2988       (void) restore_line_pointer (e);
2989       demand_empty_rest_of_line ();
2990       return;
2991     }
2992
2993   if (strcmp (string, "default") == 0)
2994     {
2995       if (strcmp (default_arch, "iamcu") == 0)
2996         string = default_arch;
2997       else
2998         {
2999           static const i386_cpu_flags cpu_unknown_flags = CPU_UNKNOWN_FLAGS;
3000
3001           cpu_arch_name = NULL;
3002           free (cpu_sub_arch_name);
3003           cpu_sub_arch_name = NULL;
3004           cpu_arch_flags = cpu_unknown_flags;
3005           cpu_arch_isa = PROCESSOR_UNKNOWN;
3006           cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
3007           if (!cpu_arch_tune_set)
3008             cpu_arch_tune = PROCESSOR_UNKNOWN;
3009
3010           vector_size = VSZ_DEFAULT;
3011
3012           j = ARRAY_SIZE (cpu_arch) + 1;
3013         }
3014     }
3015
3016   for (; j < ARRAY_SIZE (cpu_arch); j++)
3017     {
3018       if (strcmp (string + (*string == '.'), cpu_arch[j].name) == 0
3019           && (*string == '.') == (cpu_arch[j].type == PROCESSOR_NONE))
3020         {
3021           if (*string != '.')
3022             {
3023               check_cpu_arch_compatible (string, cpu_arch[j].enable);
3024
3025               if (flag_code == CODE_64BIT && !cpu_arch[j].enable.bitfield.cpu64 )
3026                 {
3027                   as_bad (_("64bit mode not supported on `%s'."),
3028                           cpu_arch[j].name);
3029                   (void) restore_line_pointer (e);
3030                   ignore_rest_of_line ();
3031                   return;
3032                 }
3033
3034               if (flag_code == CODE_32BIT && !cpu_arch[j].enable.bitfield.cpui386)
3035                 {
3036                   as_bad (_("32bit mode not supported on `%s'."),
3037                           cpu_arch[j].name);
3038                   (void) restore_line_pointer (e);
3039                   ignore_rest_of_line ();
3040                   return;
3041                 }
3042
3043               cpu_arch_name = cpu_arch[j].name;
3044               free (cpu_sub_arch_name);
3045               cpu_sub_arch_name = NULL;
3046               cpu_arch_flags = cpu_arch[j].enable;
3047               cpu_arch_isa = cpu_arch[j].type;
3048               cpu_arch_isa_flags = cpu_arch[j].enable;
3049               if (!cpu_arch_tune_set)
3050                 cpu_arch_tune = cpu_arch_isa;
3051
3052               vector_size = VSZ_DEFAULT;
3053
3054               pre_386_16bit_warned = false;
3055               break;
3056             }
3057
3058           if (cpu_flags_all_zero (&cpu_arch[j].enable))
3059             continue;
3060
3061           isa_enable (j);
3062
3063           (void) restore_line_pointer (e);
3064
3065           switch (cpu_arch[j].vsz)
3066             {
3067             default:
3068               break;
3069
3070             case vsz_set:
3071 #ifdef SVR4_COMMENT_CHARS
3072               if (*input_line_pointer == ':' || *input_line_pointer == '/')
3073 #else
3074               if (*input_line_pointer == '/')
3075 #endif
3076                 {
3077                   ++input_line_pointer;
3078                   switch (get_absolute_expression ())
3079                     {
3080                     case 512: vector_size = VSZ512; break;
3081                     case 256: vector_size = VSZ256; break;
3082                     case 128: vector_size = VSZ128; break;
3083                     default:
3084                       as_bad (_("Unrecognized vector size specifier"));
3085                       ignore_rest_of_line ();
3086                       return;
3087                     }
3088                   break;
3089                 }
3090                 /* Fall through.  */
3091             case vsz_reset:
3092               vector_size = VSZ_DEFAULT;
3093               break;
3094             }
3095
3096           demand_empty_rest_of_line ();
3097           return;
3098         }
3099     }
3100
3101   if (startswith (string, ".no") && j >= ARRAY_SIZE (cpu_arch))
3102     {
3103       /* Disable an ISA extension.  */
3104       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
3105         if (cpu_arch[j].type == PROCESSOR_NONE
3106             && strcmp (string + 3, cpu_arch[j].name) == 0)
3107           {
3108             isa_disable (j);
3109
3110             if (cpu_arch[j].vsz == vsz_set)
3111               vector_size = VSZ_DEFAULT;
3112
3113             (void) restore_line_pointer (e);
3114             demand_empty_rest_of_line ();
3115             return;
3116           }
3117     }
3118
3119   if (j == ARRAY_SIZE (cpu_arch))
3120     as_bad (_("no such architecture: `%s'"), string);
3121
3122   *input_line_pointer = e;
3123
3124   no_cond_jump_promotion = 0;
3125   if (*input_line_pointer == ','
3126       && !is_end_of_line[(unsigned char) input_line_pointer[1]])
3127     {
3128       ++input_line_pointer;
3129       e = get_symbol_name (&s);
3130       string = s;
3131
3132       if (strcmp (string, "nojumps") == 0)
3133         no_cond_jump_promotion = 1;
3134       else if (strcmp (string, "jumps") == 0)
3135         ;
3136       else
3137         as_bad (_("no such architecture modifier: `%s'"), string);
3138
3139       (void) restore_line_pointer (e);
3140     }
3141
3142   demand_empty_rest_of_line ();
3143 }
3144
3145 enum bfd_architecture
3146 i386_arch (void)
3147 {
3148   if (cpu_arch_isa == PROCESSOR_IAMCU)
3149     {
3150       if (!IS_ELF || flag_code == CODE_64BIT)
3151         as_fatal (_("Intel MCU is 32bit ELF only"));
3152       return bfd_arch_iamcu;
3153     }
3154   else
3155     return bfd_arch_i386;
3156 }
3157
3158 unsigned long
3159 i386_mach (void)
3160 {
3161   if (startswith (default_arch, "x86_64"))
3162     {
3163       if (default_arch[6] == '\0')
3164         return bfd_mach_x86_64;
3165       else
3166         return bfd_mach_x64_32;
3167     }
3168   else if (!strcmp (default_arch, "i386")
3169            || !strcmp (default_arch, "iamcu"))
3170     {
3171       if (cpu_arch_isa == PROCESSOR_IAMCU)
3172         {
3173           if (!IS_ELF)
3174             as_fatal (_("Intel MCU is 32bit ELF only"));
3175           return bfd_mach_i386_iamcu;
3176         }
3177       else
3178         return bfd_mach_i386_i386;
3179     }
3180   else
3181     as_fatal (_("unknown architecture"));
3182 }
3183 \f
3184 #include "opcodes/i386-tbl.h"
3185
3186 static void
3187 op_lookup (const char *mnemonic)
3188 {
3189    i386_op_off_t *pos = str_hash_find (op_hash, mnemonic);
3190
3191    if (pos != NULL)
3192      {
3193        current_templates.start = &i386_optab[pos[0]];
3194        current_templates.end = &i386_optab[pos[1]];
3195      }
3196    else
3197      current_templates.end = current_templates.start = NULL;
3198 }
3199
3200 void
3201 md_begin (void)
3202 {
3203   /* Support pseudo prefixes like {disp32}.  */
3204   lex_type ['{'] = LEX_BEGIN_NAME;
3205
3206   /* Initialize op_hash hash table.  */
3207   op_hash = str_htab_create ();
3208
3209   {
3210     const i386_op_off_t *cur = i386_op_sets;
3211     const i386_op_off_t *end = cur + ARRAY_SIZE (i386_op_sets) - 1;
3212
3213     for (; cur < end; ++cur)
3214       if (str_hash_insert (op_hash, insn_name (&i386_optab[*cur]), cur, 0))
3215         as_fatal (_("duplicate %s"), insn_name (&i386_optab[*cur]));
3216   }
3217
3218   /* Initialize reg_hash hash table.  */
3219   reg_hash = str_htab_create ();
3220   {
3221     const reg_entry *regtab;
3222     unsigned int regtab_size = i386_regtab_size;
3223
3224     for (regtab = i386_regtab; regtab_size--; regtab++)
3225       {
3226         switch (regtab->reg_type.bitfield.class)
3227           {
3228           case Reg:
3229             if (regtab->reg_type.bitfield.dword)
3230               {
3231                 if (regtab->reg_type.bitfield.instance == Accum)
3232                   reg_eax = regtab;
3233               }
3234             else if (regtab->reg_type.bitfield.tbyte)
3235               {
3236                 /* There's no point inserting st(<N>) in the hash table, as
3237                    parentheses aren't included in register_chars[] anyway.  */
3238                 if (regtab->reg_type.bitfield.instance != Accum)
3239                   continue;
3240                 reg_st0 = regtab;
3241               }
3242             break;
3243
3244           case SReg:
3245             switch (regtab->reg_num)
3246               {
3247               case 0: reg_es = regtab; break;
3248               case 2: reg_ss = regtab; break;
3249               case 3: reg_ds = regtab; break;
3250               }
3251             break;
3252
3253           case RegMask:
3254             if (!regtab->reg_num)
3255               reg_k0 = regtab;
3256             break;
3257           }
3258
3259         if (str_hash_insert (reg_hash, regtab->reg_name, regtab, 0) != NULL)
3260           as_fatal (_("duplicate %s"), regtab->reg_name);
3261       }
3262   }
3263
3264   /* Fill in lexical tables:  mnemonic_chars, operand_chars.  */
3265   {
3266     int c;
3267     const char *p;
3268
3269     for (c = 0; c < 256; c++)
3270       {
3271         if (ISDIGIT (c) || ISLOWER (c))
3272           {
3273             mnemonic_chars[c] = c;
3274             register_chars[c] = c;
3275             operand_chars[c] = c;
3276           }
3277         else if (ISUPPER (c))
3278           {
3279             mnemonic_chars[c] = TOLOWER (c);
3280             register_chars[c] = mnemonic_chars[c];
3281             operand_chars[c] = c;
3282           }
3283 #ifdef SVR4_COMMENT_CHARS
3284         else if (c == '\\' && strchr (i386_comment_chars, '/'))
3285           operand_chars[c] = c;
3286 #endif
3287
3288         if (c >= 128)
3289           operand_chars[c] = c;
3290       }
3291
3292     mnemonic_chars['_'] = '_';
3293     mnemonic_chars['-'] = '-';
3294     mnemonic_chars['.'] = '.';
3295
3296     for (p = extra_symbol_chars; *p != '\0'; p++)
3297       operand_chars[(unsigned char) *p] = *p;
3298     for (p = operand_special_chars; *p != '\0'; p++)
3299       operand_chars[(unsigned char) *p] = *p;
3300   }
3301
3302   if (object_64bit)
3303     {
3304 #if defined (OBJ_COFF) && defined (TE_PE)
3305       x86_dwarf2_return_column = (OUTPUT_FLAVOR == bfd_target_coff_flavour
3306                                   ? 32 : 16);
3307 #else
3308       x86_dwarf2_return_column = 16;
3309 #endif
3310       x86_cie_data_alignment = -8;
3311 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3312       x86_sframe_cfa_sp_reg = REG_SP;
3313       x86_sframe_cfa_fp_reg = REG_FP;
3314 #endif
3315     }
3316   else
3317     {
3318       x86_dwarf2_return_column = 8;
3319       x86_cie_data_alignment = -4;
3320     }
3321
3322   /* NB: FUSED_JCC_PADDING frag must have sufficient room so that it
3323      can be turned into BRANCH_PREFIX frag.  */
3324   if (align_branch_prefix_size > MAX_FUSED_JCC_PADDING_SIZE)
3325     abort ();
3326 }
3327
3328 void
3329 i386_print_statistics (FILE *file)
3330 {
3331   htab_print_statistics (file, "i386 opcode", op_hash);
3332   htab_print_statistics (file, "i386 register", reg_hash);
3333 }
3334
3335 void
3336 i386_md_end (void)
3337 {
3338   htab_delete (op_hash);
3339   htab_delete (reg_hash);
3340 }
3341 \f
3342 #ifdef DEBUG386
3343
3344 /* Debugging routines for md_assemble.  */
3345 static void pte (insn_template *);
3346 static void pt (i386_operand_type);
3347 static void pe (expressionS *);
3348 static void ps (symbolS *);
3349
3350 static void
3351 pi (const char *line, i386_insn *x)
3352 {
3353   unsigned int j;
3354
3355   fprintf (stdout, "%s: template ", line);
3356   pte (&x->tm);
3357   fprintf (stdout, "  address: base %s  index %s  scale %x\n",
3358            x->base_reg ? x->base_reg->reg_name : "none",
3359            x->index_reg ? x->index_reg->reg_name : "none",
3360            x->log2_scale_factor);
3361   fprintf (stdout, "  modrm:  mode %x  reg %x  reg/mem %x\n",
3362            x->rm.mode, x->rm.reg, x->rm.regmem);
3363   fprintf (stdout, "  sib:  base %x  index %x  scale %x\n",
3364            x->sib.base, x->sib.index, x->sib.scale);
3365   fprintf (stdout, "  rex: 64bit %x  extX %x  extY %x  extZ %x\n",
3366            (x->rex & REX_W) != 0,
3367            (x->rex & REX_R) != 0,
3368            (x->rex & REX_X) != 0,
3369            (x->rex & REX_B) != 0);
3370   for (j = 0; j < x->operands; j++)
3371     {
3372       fprintf (stdout, "    #%d:  ", j + 1);
3373       pt (x->types[j]);
3374       fprintf (stdout, "\n");
3375       if (x->types[j].bitfield.class == Reg
3376           || x->types[j].bitfield.class == RegMMX
3377           || x->types[j].bitfield.class == RegSIMD
3378           || x->types[j].bitfield.class == RegMask
3379           || x->types[j].bitfield.class == SReg
3380           || x->types[j].bitfield.class == RegCR
3381           || x->types[j].bitfield.class == RegDR
3382           || x->types[j].bitfield.class == RegTR
3383           || x->types[j].bitfield.class == RegBND)
3384         fprintf (stdout, "%s\n", x->op[j].regs->reg_name);
3385       if (operand_type_check (x->types[j], imm))
3386         pe (x->op[j].imms);
3387       if (operand_type_check (x->types[j], disp))
3388         pe (x->op[j].disps);
3389     }
3390 }
3391
3392 static void
3393 pte (insn_template *t)
3394 {
3395   static const unsigned char opc_pfx[] = { 0, 0x66, 0xf3, 0xf2 };
3396   static const char *const opc_spc[] = {
3397     NULL, "0f", "0f38", "0f3a", NULL, "evexmap5", "evexmap6", NULL,
3398     "XOP08", "XOP09", "XOP0A",
3399   };
3400   unsigned int j;
3401
3402   fprintf (stdout, " %d operands ", t->operands);
3403   if (opc_pfx[t->opcode_modifier.opcodeprefix])
3404     fprintf (stdout, "pfx %x ", opc_pfx[t->opcode_modifier.opcodeprefix]);
3405   if (opc_spc[t->opcode_space])
3406     fprintf (stdout, "space %s ", opc_spc[t->opcode_space]);
3407   fprintf (stdout, "opcode %x ", t->base_opcode);
3408   if (t->extension_opcode != None)
3409     fprintf (stdout, "ext %x ", t->extension_opcode);
3410   if (t->opcode_modifier.d)
3411     fprintf (stdout, "D");
3412   if (t->opcode_modifier.w)
3413     fprintf (stdout, "W");
3414   fprintf (stdout, "\n");
3415   for (j = 0; j < t->operands; j++)
3416     {
3417       fprintf (stdout, "    #%d type ", j + 1);
3418       pt (t->operand_types[j]);
3419       fprintf (stdout, "\n");
3420     }
3421 }
3422
3423 static void
3424 pe (expressionS *e)
3425 {
3426   fprintf (stdout, "    operation     %d\n", e->X_op);
3427   fprintf (stdout, "    add_number    %" PRId64 " (%" PRIx64 ")\n",
3428            (int64_t) e->X_add_number, (uint64_t) (valueT) e->X_add_number);
3429   if (e->X_add_symbol)
3430     {
3431       fprintf (stdout, "    add_symbol    ");
3432       ps (e->X_add_symbol);
3433       fprintf (stdout, "\n");
3434     }
3435   if (e->X_op_symbol)
3436     {
3437       fprintf (stdout, "    op_symbol    ");
3438       ps (e->X_op_symbol);
3439       fprintf (stdout, "\n");
3440     }
3441 }
3442
3443 static void
3444 ps (symbolS *s)
3445 {
3446   fprintf (stdout, "%s type %s%s",
3447            S_GET_NAME (s),
3448            S_IS_EXTERNAL (s) ? "EXTERNAL " : "",
3449            segment_name (S_GET_SEGMENT (s)));
3450 }
3451
3452 static struct type_name
3453   {
3454     i386_operand_type mask;
3455     const char *name;
3456   }
3457 const type_names[] =
3458 {
3459   { { .bitfield = { .class = Reg, .byte = 1 } }, "r8" },
3460   { { .bitfield = { .class = Reg, .word = 1 } }, "r16" },
3461   { { .bitfield = { .class = Reg, .dword = 1 } }, "r32" },
3462   { { .bitfield = { .class = Reg, .qword = 1 } }, "r64" },
3463   { { .bitfield = { .instance = Accum, .byte = 1 } }, "acc8" },
3464   { { .bitfield = { .instance = Accum, .word = 1 } }, "acc16" },
3465   { { .bitfield = { .instance = Accum, .dword = 1 } }, "acc32" },
3466   { { .bitfield = { .instance = Accum, .qword = 1 } }, "acc64" },
3467   { { .bitfield = { .imm8 = 1 } }, "i8" },
3468   { { .bitfield = { .imm8s = 1 } }, "i8s" },
3469   { { .bitfield = { .imm16 = 1 } }, "i16" },
3470   { { .bitfield = { .imm32 = 1 } }, "i32" },
3471   { { .bitfield = { .imm32s = 1 } }, "i32s" },
3472   { { .bitfield = { .imm64 = 1 } }, "i64" },
3473   { { .bitfield = { .imm1 = 1 } }, "i1" },
3474   { { .bitfield = { .baseindex = 1 } }, "BaseIndex" },
3475   { { .bitfield = { .disp8 = 1 } }, "d8" },
3476   { { .bitfield = { .disp16 = 1 } }, "d16" },
3477   { { .bitfield = { .disp32 = 1 } }, "d32" },
3478   { { .bitfield = { .disp64 = 1 } }, "d64" },
3479   { { .bitfield = { .instance = RegD, .word = 1 } }, "InOutPortReg" },
3480   { { .bitfield = { .instance = RegC, .byte = 1 } }, "ShiftCount" },
3481   { { .bitfield = { .class = RegCR } }, "control reg" },
3482   { { .bitfield = { .class = RegTR } }, "test reg" },
3483   { { .bitfield = { .class = RegDR } }, "debug reg" },
3484   { { .bitfield = { .class = Reg, .tbyte = 1 } }, "FReg" },
3485   { { .bitfield = { .instance = Accum, .tbyte = 1 } }, "FAcc" },
3486   { { .bitfield = { .class = SReg } }, "SReg" },
3487   { { .bitfield = { .class = RegMMX } }, "rMMX" },
3488   { { .bitfield = { .class = RegSIMD, .xmmword = 1 } }, "rXMM" },
3489   { { .bitfield = { .class = RegSIMD, .ymmword = 1 } }, "rYMM" },
3490   { { .bitfield = { .class = RegSIMD, .zmmword = 1 } }, "rZMM" },
3491   { { .bitfield = { .class = RegSIMD, .tmmword = 1 } }, "rTMM" },
3492   { { .bitfield = { .class = RegMask } }, "Mask reg" },
3493 };
3494
3495 static void
3496 pt (i386_operand_type t)
3497 {
3498   unsigned int j;
3499   i386_operand_type a;
3500
3501   for (j = 0; j < ARRAY_SIZE (type_names); j++)
3502     {
3503       a = operand_type_and (t, type_names[j].mask);
3504       if (operand_type_equal (&a, &type_names[j].mask))
3505         fprintf (stdout, "%s, ",  type_names[j].name);
3506     }
3507   fflush (stdout);
3508 }
3509
3510 #endif /* DEBUG386 */
3511 \f
3512 static bfd_reloc_code_real_type
3513 reloc (unsigned int size,
3514        int pcrel,
3515        int sign,
3516        bfd_reloc_code_real_type other)
3517 {
3518   if (other != NO_RELOC)
3519     {
3520       reloc_howto_type *rel;
3521
3522       if (size == 8)
3523         switch (other)
3524           {
3525           case BFD_RELOC_X86_64_GOT32:
3526             return BFD_RELOC_X86_64_GOT64;
3527             break;
3528           case BFD_RELOC_X86_64_GOTPLT64:
3529             return BFD_RELOC_X86_64_GOTPLT64;
3530             break;
3531           case BFD_RELOC_X86_64_PLTOFF64:
3532             return BFD_RELOC_X86_64_PLTOFF64;
3533             break;
3534           case BFD_RELOC_X86_64_GOTPC32:
3535             other = BFD_RELOC_X86_64_GOTPC64;
3536             break;
3537           case BFD_RELOC_X86_64_GOTPCREL:
3538             other = BFD_RELOC_X86_64_GOTPCREL64;
3539             break;
3540           case BFD_RELOC_X86_64_TPOFF32:
3541             other = BFD_RELOC_X86_64_TPOFF64;
3542             break;
3543           case BFD_RELOC_X86_64_DTPOFF32:
3544             other = BFD_RELOC_X86_64_DTPOFF64;
3545             break;
3546           default:
3547             break;
3548           }
3549
3550 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3551       if (other == BFD_RELOC_SIZE32)
3552         {
3553           if (size == 8)
3554             other = BFD_RELOC_SIZE64;
3555           if (pcrel)
3556             {
3557               as_bad (_("there are no pc-relative size relocations"));
3558               return NO_RELOC;
3559             }
3560         }
3561 #endif
3562
3563       /* Sign-checking 4-byte relocations in 16-/32-bit code is pointless.  */
3564       if (size == 4 && (flag_code != CODE_64BIT || disallow_64bit_reloc))
3565         sign = -1;
3566
3567       rel = bfd_reloc_type_lookup (stdoutput, other);
3568       if (!rel)
3569         as_bad (_("unknown relocation (%u)"), other);
3570       else if (size != bfd_get_reloc_size (rel))
3571         as_bad (_("%u-byte relocation cannot be applied to %u-byte field"),
3572                 bfd_get_reloc_size (rel),
3573                 size);
3574       else if (pcrel && !rel->pc_relative)
3575         as_bad (_("non-pc-relative relocation for pc-relative field"));
3576       else if ((rel->complain_on_overflow == complain_overflow_signed
3577                 && !sign)
3578                || (rel->complain_on_overflow == complain_overflow_unsigned
3579                    && sign > 0))
3580         as_bad (_("relocated field and relocation type differ in signedness"));
3581       else
3582         return other;
3583       return NO_RELOC;
3584     }
3585
3586   if (pcrel)
3587     {
3588       if (!sign)
3589         as_bad (_("there are no unsigned pc-relative relocations"));
3590       switch (size)
3591         {
3592         case 1: return BFD_RELOC_8_PCREL;
3593         case 2: return BFD_RELOC_16_PCREL;
3594         case 4: return BFD_RELOC_32_PCREL;
3595         case 8: return BFD_RELOC_64_PCREL;
3596         }
3597       as_bad (_("cannot do %u byte pc-relative relocation"), size);
3598     }
3599   else
3600     {
3601       if (sign > 0)
3602         switch (size)
3603           {
3604           case 4: return BFD_RELOC_X86_64_32S;
3605           }
3606       else
3607         switch (size)
3608           {
3609           case 1: return BFD_RELOC_8;
3610           case 2: return BFD_RELOC_16;
3611           case 4: return BFD_RELOC_32;
3612           case 8: return BFD_RELOC_64;
3613           }
3614       as_bad (_("cannot do %s %u byte relocation"),
3615               sign > 0 ? "signed" : "unsigned", size);
3616     }
3617
3618   return NO_RELOC;
3619 }
3620
3621 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3622 /* Here we decide which fixups can be adjusted to make them relative to
3623    the beginning of the section instead of the symbol.  Basically we need
3624    to make sure that the dynamic relocations are done correctly, so in
3625    some cases we force the original symbol to be used.  */
3626
3627 int
3628 tc_i386_fix_adjustable (fixS *fixP)
3629 {
3630   if (!IS_ELF)
3631     return 1;
3632
3633   /* Don't adjust pc-relative references to merge sections in 64-bit
3634      mode.  */
3635   if (use_rela_relocations
3636       && (S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_MERGE) != 0
3637       && fixP->fx_pcrel)
3638     return 0;
3639
3640   /* The x86_64 GOTPCREL are represented as 32bit PCrel relocations
3641      and changed later by validate_fix.  */
3642   if (GOT_symbol && fixP->fx_subsy == GOT_symbol
3643       && fixP->fx_r_type == BFD_RELOC_32_PCREL)
3644     return 0;
3645
3646   /* Adjust_reloc_syms doesn't know about the GOT.  Need to keep symbol
3647      for size relocations.  */
3648   if (fixP->fx_r_type == BFD_RELOC_SIZE32
3649       || fixP->fx_r_type == BFD_RELOC_SIZE64
3650       || fixP->fx_r_type == BFD_RELOC_386_GOTOFF
3651       || fixP->fx_r_type == BFD_RELOC_386_GOT32
3652       || fixP->fx_r_type == BFD_RELOC_386_GOT32X
3653       || fixP->fx_r_type == BFD_RELOC_386_TLS_GD
3654       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDM
3655       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDO_32
3656       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE_32
3657       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE
3658       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTIE
3659       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE_32
3660       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE
3661       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTDESC
3662       || fixP->fx_r_type == BFD_RELOC_386_TLS_DESC_CALL
3663       || fixP->fx_r_type == BFD_RELOC_X86_64_GOT32
3664       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCREL
3665       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCRELX
3666       || fixP->fx_r_type == BFD_RELOC_X86_64_REX_GOTPCRELX
3667       || fixP->fx_r_type == BFD_RELOC_X86_64_CODE_4_GOTPCRELX
3668       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSGD
3669       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSLD
3670       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF32
3671       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF64
3672       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF
3673       || fixP->fx_r_type == BFD_RELOC_X86_64_CODE_4_GOTTPOFF
3674       || fixP->fx_r_type == BFD_RELOC_X86_64_CODE_6_GOTTPOFF
3675       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF32
3676       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF64
3677       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTOFF64
3678       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
3679       || fixP->fx_r_type == BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC
3680       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSDESC_CALL
3681       || fixP->fx_r_type == BFD_RELOC_VTABLE_INHERIT
3682       || fixP->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
3683     return 0;
3684   return 1;
3685 }
3686 #endif
3687
3688 static INLINE bool
3689 want_disp32 (const insn_template *t)
3690 {
3691   return flag_code != CODE_64BIT
3692          || i.prefix[ADDR_PREFIX]
3693          || (t->mnem_off == MN_lea
3694              && (!i.types[1].bitfield.qword
3695                 || t->opcode_modifier.size == SIZE32));
3696 }
3697
3698 static int
3699 intel_float_operand (const char *mnemonic)
3700 {
3701   /* Note that the value returned is meaningful only for opcodes with (memory)
3702      operands, hence the code here is free to improperly handle opcodes that
3703      have no operands (for better performance and smaller code). */
3704
3705   if (mnemonic[0] != 'f')
3706     return 0; /* non-math */
3707
3708   switch (mnemonic[1])
3709     {
3710     /* fclex, fdecstp, fdisi, femms, feni, fincstp, finit, fsetpm, and
3711        the fs segment override prefix not currently handled because no
3712        call path can make opcodes without operands get here */
3713     case 'i':
3714       return 2 /* integer op */;
3715     case 'l':
3716       if (mnemonic[2] == 'd' && (mnemonic[3] == 'c' || mnemonic[3] == 'e'))
3717         return 3; /* fldcw/fldenv */
3718       break;
3719     case 'n':
3720       if (mnemonic[2] != 'o' /* fnop */)
3721         return 3; /* non-waiting control op */
3722       break;
3723     case 'r':
3724       if (mnemonic[2] == 's')
3725         return 3; /* frstor/frstpm */
3726       break;
3727     case 's':
3728       if (mnemonic[2] == 'a')
3729         return 3; /* fsave */
3730       if (mnemonic[2] == 't')
3731         {
3732           switch (mnemonic[3])
3733             {
3734             case 'c': /* fstcw */
3735             case 'd': /* fstdw */
3736             case 'e': /* fstenv */
3737             case 's': /* fsts[gw] */
3738               return 3;
3739             }
3740         }
3741       break;
3742     case 'x':
3743       if (mnemonic[2] == 'r' || mnemonic[2] == 's')
3744         return 0; /* fxsave/fxrstor are not really math ops */
3745       break;
3746     }
3747
3748   return 1;
3749 }
3750
3751 static INLINE void
3752 install_template (const insn_template *t)
3753 {
3754   unsigned int l;
3755
3756   i.tm = *t;
3757
3758   /* Dual VEX/EVEX templates need stripping one of the possible variants.  */
3759   if (t->opcode_modifier.vex && t->opcode_modifier.evex)
3760     {
3761       if ((maybe_cpu (t, CpuAVX) || maybe_cpu (t, CpuAVX2)
3762            || maybe_cpu (t, CpuFMA))
3763           && (maybe_cpu (t, CpuAVX512F) || maybe_cpu (t, CpuAVX512VL)))
3764         {
3765           if (need_evex_encoding (t))
3766             {
3767               i.tm.opcode_modifier.vex = 0;
3768               i.tm.cpu.bitfield.cpuavx512f = i.tm.cpu_any.bitfield.cpuavx512f;
3769               i.tm.cpu.bitfield.cpuavx512vl = i.tm.cpu_any.bitfield.cpuavx512vl;
3770             }
3771           else
3772             {
3773               i.tm.opcode_modifier.evex = 0;
3774               if (i.tm.cpu_any.bitfield.cpuavx)
3775                 i.tm.cpu.bitfield.cpuavx = 1;
3776               else if (!i.tm.cpu.bitfield.isa)
3777                 i.tm.cpu.bitfield.isa = i.tm.cpu_any.bitfield.isa;
3778               else
3779                 gas_assert (i.tm.cpu.bitfield.isa == i.tm.cpu_any.bitfield.isa);
3780             }
3781         }
3782
3783       if ((maybe_cpu (t, CpuCMPCCXADD) || maybe_cpu (t, CpuAMX_TILE)
3784            || maybe_cpu (t, CpuAVX512F) || maybe_cpu (t, CpuAVX512DQ)
3785            || maybe_cpu (t, CpuAVX512BW) || maybe_cpu (t, CpuBMI)
3786            || maybe_cpu (t, CpuBMI2) || maybe_cpu (t, CpuUSER_MSR))
3787           && maybe_cpu (t, CpuAPX_F))
3788         {
3789           if (need_evex_encoding (t))
3790             i.tm.opcode_modifier.vex = 0;
3791           else
3792             i.tm.opcode_modifier.evex = 0;
3793         }
3794     }
3795
3796   /* Note that for pseudo prefixes this produces a length of 1. But for them
3797      the length isn't interesting at all.  */
3798   for (l = 1; l < 4; ++l)
3799     if (!(t->base_opcode >> (8 * l)))
3800       break;
3801
3802   i.opcode_length = l;
3803 }
3804
3805 /* Build the VEX prefix.  */
3806
3807 static void
3808 build_vex_prefix (const insn_template *t)
3809 {
3810   unsigned int register_specifier;
3811   unsigned int vector_length;
3812   unsigned int w;
3813
3814   /* Check register specifier.  */
3815   if (i.vex.register_specifier)
3816     {
3817       register_specifier =
3818         ~register_number (i.vex.register_specifier) & 0xf;
3819       gas_assert ((i.vex.register_specifier->reg_flags & RegVRex) == 0);
3820     }
3821   else
3822     register_specifier = 0xf;
3823
3824   /* Use 2-byte VEX prefix by swapping destination and source operand
3825      if there are more than 1 register operand.  */
3826   if (i.reg_operands > 1
3827       && i.encoding != encoding_vex3
3828       && i.dir_encoding == dir_encoding_default
3829       && i.operands == i.reg_operands
3830       && operand_type_equal (&i.types[0], &i.types[i.operands - 1])
3831       && i.tm.opcode_space == SPACE_0F
3832       && (i.tm.opcode_modifier.load || i.tm.opcode_modifier.d)
3833       && i.rex == REX_B)
3834     {
3835       unsigned int xchg;
3836
3837       swap_2_operands (0, i.operands - 1);
3838
3839       gas_assert (i.rm.mode == 3);
3840
3841       i.rex = REX_R;
3842       xchg = i.rm.regmem;
3843       i.rm.regmem = i.rm.reg;
3844       i.rm.reg = xchg;
3845
3846       if (i.tm.opcode_modifier.d)
3847         i.tm.base_opcode ^= (i.tm.base_opcode & 0xee) != 0x6e
3848                             ? Opcode_ExtD : Opcode_SIMD_IntD;
3849       else /* Use the next insn.  */
3850         install_template (&t[1]);
3851     }
3852
3853   /* Use 2-byte VEX prefix by swapping commutative source operands if there
3854      are no memory operands and at least 3 register ones.  */
3855   if (i.reg_operands >= 3
3856       && i.encoding != encoding_vex3
3857       && i.reg_operands == i.operands - i.imm_operands
3858       && i.tm.opcode_modifier.vex
3859       && i.tm.opcode_modifier.commutative
3860       /* .commutative aliases .staticrounding; disambiguate.  */
3861       && !i.tm.opcode_modifier.sae
3862       && (i.tm.opcode_modifier.sse2avx
3863           || (optimize > 1 && !i.no_optimize))
3864       && i.rex == REX_B
3865       && i.vex.register_specifier
3866       && !(i.vex.register_specifier->reg_flags & RegRex))
3867     {
3868       unsigned int xchg = i.operands - i.reg_operands;
3869
3870       gas_assert (i.tm.opcode_space == SPACE_0F);
3871       gas_assert (!i.tm.opcode_modifier.sae);
3872       gas_assert (operand_type_equal (&i.types[i.operands - 2],
3873                                       &i.types[i.operands - 3]));
3874       gas_assert (i.rm.mode == 3);
3875
3876       swap_2_operands (xchg, xchg + 1);
3877
3878       i.rex = 0;
3879       xchg = i.rm.regmem | 8;
3880       i.rm.regmem = ~register_specifier & 0xf;
3881       gas_assert (!(i.rm.regmem & 8));
3882       i.vex.register_specifier += xchg - i.rm.regmem;
3883       register_specifier = ~xchg & 0xf;
3884     }
3885
3886   if (i.tm.opcode_modifier.vex == VEXScalar)
3887     vector_length = avxscalar;
3888   else if (i.tm.opcode_modifier.vex == VEX256)
3889     vector_length = 1;
3890   else if (dot_insn () && i.tm.opcode_modifier.vex == VEX128)
3891     vector_length = 0;
3892   else
3893     {
3894       unsigned int op;
3895
3896       /* Determine vector length from the last multi-length vector
3897          operand.  */
3898       vector_length = 0;
3899       for (op = t->operands; op--;)
3900         if (t->operand_types[op].bitfield.xmmword
3901             && t->operand_types[op].bitfield.ymmword
3902             && i.types[op].bitfield.ymmword)
3903           {
3904             vector_length = 1;
3905             break;
3906           }
3907     }
3908
3909   /* Check the REX.W bit and VEXW.  */
3910   if (i.tm.opcode_modifier.vexw == VEXWIG)
3911     w = (vexwig == vexw1 || (i.rex & REX_W)) ? 1 : 0;
3912   else if (i.tm.opcode_modifier.vexw && !(i.rex & REX_W))
3913     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3914   else
3915     w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0;
3916
3917   /* Use 2-byte VEX prefix if possible.  */
3918   if (w == 0
3919       && i.encoding != encoding_vex3
3920       && i.tm.opcode_space == SPACE_0F
3921       && (i.rex & (REX_W | REX_X | REX_B)) == 0)
3922     {
3923       /* 2-byte VEX prefix.  */
3924       unsigned int r;
3925
3926       i.vex.length = 2;
3927       i.vex.bytes[0] = 0xc5;
3928
3929       /* Check the REX.R bit.  */
3930       r = (i.rex & REX_R) ? 0 : 1;
3931       i.vex.bytes[1] = (r << 7
3932                         | register_specifier << 3
3933                         | vector_length << 2
3934                         | i.tm.opcode_modifier.opcodeprefix);
3935     }
3936   else
3937     {
3938       /* 3-byte VEX prefix.  */
3939       i.vex.length = 3;
3940
3941       switch (i.tm.opcode_space)
3942         {
3943         case SPACE_0F:
3944         case SPACE_0F38:
3945         case SPACE_0F3A:
3946         case SPACE_VEXMAP7:
3947           i.vex.bytes[0] = 0xc4;
3948           break;
3949         case SPACE_XOP08:
3950         case SPACE_XOP09:
3951         case SPACE_XOP0A:
3952           i.vex.bytes[0] = 0x8f;
3953           break;
3954         default:
3955           abort ();
3956         }
3957
3958       /* The high 3 bits of the second VEX byte are 1's compliment
3959          of RXB bits from REX.  */
3960       i.vex.bytes[1] = ((~i.rex & 7) << 5)
3961                        | (!dot_insn () ? i.tm.opcode_space
3962                                        : i.insn_opcode_space);
3963
3964       i.vex.bytes[2] = (w << 7
3965                         | register_specifier << 3
3966                         | vector_length << 2
3967                         | i.tm.opcode_modifier.opcodeprefix);
3968     }
3969 }
3970
3971 static INLINE bool
3972 is_any_vex_encoding (const insn_template *t)
3973 {
3974   return t->opcode_modifier.vex || t->opcode_modifier.evex;
3975 }
3976
3977 /* We can use this function only when the current encoding is evex.  */
3978 static INLINE bool
3979 is_apx_evex_encoding (void)
3980 {
3981   return i.rex2 || i.tm.opcode_space == SPACE_EVEXMAP4 || i.has_nf
3982     || (i.vex.register_specifier
3983         && (i.vex.register_specifier->reg_flags & RegRex2));
3984 }
3985
3986 static INLINE bool
3987 is_apx_rex2_encoding (void)
3988 {
3989   return i.rex2 || i.rex2_encoding
3990         || i.tm.opcode_modifier.rex2;
3991 }
3992
3993 static unsigned int
3994 get_broadcast_bytes (const insn_template *t, bool diag)
3995 {
3996   unsigned int op, bytes;
3997   const i386_operand_type *types;
3998
3999   if (i.broadcast.type)
4000     return (1 << (t->opcode_modifier.broadcast - 1)) * i.broadcast.type;
4001
4002   gas_assert (intel_syntax);
4003
4004   for (op = 0; op < t->operands; ++op)
4005     if (t->operand_types[op].bitfield.baseindex)
4006       break;
4007
4008   gas_assert (op < t->operands);
4009
4010   if (t->opcode_modifier.evex != EVEXDYN)
4011     switch (i.broadcast.bytes)
4012       {
4013       case 1:
4014         if (t->operand_types[op].bitfield.word)
4015           return 2;
4016       /* Fall through.  */
4017       case 2:
4018         if (t->operand_types[op].bitfield.dword)
4019           return 4;
4020       /* Fall through.  */
4021       case 4:
4022         if (t->operand_types[op].bitfield.qword)
4023           return 8;
4024       /* Fall through.  */
4025       case 8:
4026         if (t->operand_types[op].bitfield.xmmword)
4027           return 16;
4028         if (t->operand_types[op].bitfield.ymmword)
4029           return 32;
4030         if (t->operand_types[op].bitfield.zmmword)
4031           return 64;
4032       /* Fall through.  */
4033       default:
4034         abort ();
4035       }
4036
4037   gas_assert (op + 1 < t->operands);
4038
4039   if (t->operand_types[op + 1].bitfield.xmmword
4040       + t->operand_types[op + 1].bitfield.ymmword
4041       + t->operand_types[op + 1].bitfield.zmmword > 1)
4042     {
4043       types = &i.types[op + 1];
4044       diag = false;
4045     }
4046   else /* Ambiguous - guess with a preference to non-AVX512VL forms.  */
4047     types = &t->operand_types[op];
4048
4049   if (types->bitfield.zmmword)
4050     bytes = 64;
4051   else if (types->bitfield.ymmword)
4052     bytes = 32;
4053   else
4054     bytes = 16;
4055
4056   if (diag)
4057     as_warn (_("ambiguous broadcast for `%s', using %u-bit form"),
4058              insn_name (t), bytes * 8);
4059
4060   return bytes;
4061 }
4062
4063 /* Build the EVEX prefix.  */
4064
4065 static void
4066 build_evex_prefix (void)
4067 {
4068   unsigned int register_specifier, w;
4069   rex_byte vrex_used = 0;
4070
4071   /* Check register specifier.  */
4072   if (i.vex.register_specifier)
4073     {
4074       gas_assert ((i.vrex & REX_X) == 0);
4075
4076       register_specifier = i.vex.register_specifier->reg_num;
4077       if ((i.vex.register_specifier->reg_flags & RegRex))
4078         register_specifier += 8;
4079       /* The upper 16 registers are encoded in the fourth byte of the
4080          EVEX prefix.  */
4081       if (!(i.vex.register_specifier->reg_flags & RegVRex))
4082         i.vex.bytes[3] = 0x8;
4083       register_specifier = ~register_specifier & 0xf;
4084     }
4085   else
4086     {
4087       register_specifier = 0xf;
4088
4089       /* Encode upper 16 vector index register in the fourth byte of
4090          the EVEX prefix.  */
4091       if (!(i.vrex & REX_X))
4092         i.vex.bytes[3] = 0x8;
4093       else
4094         vrex_used |= REX_X;
4095     }
4096
4097   /* 4 byte EVEX prefix.  */
4098   i.vex.length = 4;
4099   i.vex.bytes[0] = 0x62;
4100
4101   /* The high 3 bits of the second EVEX byte are 1's compliment of RXB
4102      bits from REX.  */
4103   gas_assert (i.tm.opcode_space >= SPACE_0F);
4104   gas_assert (i.tm.opcode_space <= SPACE_VEXMAP7);
4105   i.vex.bytes[1] = ((~i.rex & 7) << 5)
4106                    | (!dot_insn () ? i.tm.opcode_space
4107                                    : i.insn_opcode_space);
4108
4109   /* The fifth bit of the second EVEX byte is 1's compliment of the
4110      REX_R bit in VREX.  */
4111   if (!(i.vrex & REX_R))
4112     i.vex.bytes[1] |= 0x10;
4113   else
4114     vrex_used |= REX_R;
4115
4116   if ((i.reg_operands + i.imm_operands) == i.operands)
4117     {
4118       /* When all operands are registers, the REX_X bit in REX is not
4119          used.  We reuse it to encode the upper 16 registers, which is
4120          indicated by the REX_B bit in VREX.  The REX_X bit is encoded
4121          as 1's compliment.  */
4122       if ((i.vrex & REX_B))
4123         {
4124           vrex_used |= REX_B;
4125           i.vex.bytes[1] &= ~0x40;
4126         }
4127     }
4128
4129   /* EVEX instructions shouldn't need the REX prefix.  */
4130   i.vrex &= ~vrex_used;
4131   gas_assert (i.vrex == 0);
4132
4133   /* Check the REX.W bit and VEXW.  */
4134   if (i.tm.opcode_modifier.vexw == VEXWIG)
4135     w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;
4136   else if (i.tm.opcode_modifier.vexw && !(i.rex & REX_W))
4137     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
4138   else
4139     w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
4140
4141   /* The third byte of the EVEX prefix.  */
4142   i.vex.bytes[2] = ((w << 7)
4143                     | (register_specifier << 3)
4144                     | 4 /* Encode the U bit.  */
4145                     | i.tm.opcode_modifier.opcodeprefix);
4146
4147   /* The fourth byte of the EVEX prefix.  */
4148   /* The zeroing-masking bit.  */
4149   if (i.mask.reg && i.mask.zeroing)
4150     i.vex.bytes[3] |= 0x80;
4151
4152   /* Don't always set the broadcast bit if there is no RC.  */
4153   if (i.rounding.type == rc_none)
4154     {
4155       /* Encode the vector length.  */
4156       unsigned int vec_length;
4157
4158       if (i.tm.opcode_modifier.evex == EVEXDYN)
4159         {
4160           unsigned int op;
4161
4162           /* Determine vector length from the last multi-length vector
4163              operand.  */
4164           for (op = i.operands; op--;)
4165             if (i.tm.operand_types[op].bitfield.xmmword
4166                 + i.tm.operand_types[op].bitfield.ymmword
4167                 + i.tm.operand_types[op].bitfield.zmmword > 1)
4168               {
4169                 if (i.types[op].bitfield.zmmword)
4170                   {
4171                     i.tm.opcode_modifier.evex = EVEX512;
4172                     break;
4173                   }
4174                 else if (i.types[op].bitfield.ymmword)
4175                   {
4176                     i.tm.opcode_modifier.evex = EVEX256;
4177                     break;
4178                   }
4179                 else if (i.types[op].bitfield.xmmword)
4180                   {
4181                     i.tm.opcode_modifier.evex = EVEX128;
4182                     break;
4183                   }
4184                 else if ((i.broadcast.type || i.broadcast.bytes)
4185                          && op == i.broadcast.operand)
4186                   {
4187                     switch (get_broadcast_bytes (&i.tm, true))
4188                       {
4189                         case 64:
4190                           i.tm.opcode_modifier.evex = EVEX512;
4191                           break;
4192                         case 32:
4193                           i.tm.opcode_modifier.evex = EVEX256;
4194                           break;
4195                         case 16:
4196                           i.tm.opcode_modifier.evex = EVEX128;
4197                           break;
4198                         default:
4199                           abort ();
4200                       }
4201                     break;
4202                   }
4203               }
4204
4205           if (op >= MAX_OPERANDS)
4206             abort ();
4207         }
4208
4209       switch (i.tm.opcode_modifier.evex)
4210         {
4211         case EVEXLIG: /* LL' is ignored */
4212           vec_length = evexlig << 5;
4213           break;
4214         case EVEX128:
4215           vec_length = 0 << 5;
4216           break;
4217         case EVEX256:
4218           vec_length = 1 << 5;
4219           break;
4220         case EVEX512:
4221           vec_length = 2 << 5;
4222           break;
4223         case EVEX_L3:
4224           if (dot_insn ())
4225             {
4226               vec_length = 3 << 5;
4227               break;
4228             }
4229           /* Fall through.  */
4230         default:
4231           abort ();
4232           break;
4233         }
4234       i.vex.bytes[3] |= vec_length;
4235       /* Encode the broadcast bit.  */
4236       if (i.broadcast.type || i.broadcast.bytes)
4237         i.vex.bytes[3] |= 0x10;
4238     }
4239   else if (i.rounding.type != saeonly)
4240     i.vex.bytes[3] |= 0x10 | (i.rounding.type << 5);
4241   else
4242     i.vex.bytes[3] |= 0x10 | (evexrcig << 5);
4243
4244   if (i.mask.reg)
4245     i.vex.bytes[3] |= i.mask.reg->reg_num;
4246 }
4247
4248 /* Build (2 bytes) rex2 prefix.
4249    | D5h |
4250    | m | R4 X4 B4 | W R X B |
4251
4252    Rex2 reuses i.vex as they both encode i.tm.opcode_space in their prefixes.
4253  */
4254 static void
4255 build_rex2_prefix (void)
4256 {
4257   i.vex.length = 2;
4258   i.vex.bytes[0] = 0xd5;
4259   /* For the W R X B bits, the variables of rex prefix will be reused.  */
4260   i.vex.bytes[1] = ((i.tm.opcode_space << 7)
4261                     | (i.rex2 << 4) | i.rex);
4262 }
4263
4264 /* Build the EVEX prefix (4-byte) for evex insn
4265    | 62h |
4266    | `R`X`B`R' | B'mmm |
4267    | W | v`v`v`v | `x' | pp |
4268    | z| L'L | b | `v | aaa |
4269 */
4270 static void
4271 build_apx_evex_prefix (void)
4272 {
4273   build_evex_prefix ();
4274   if (i.rex2 & REX_R)
4275     i.vex.bytes[1] &= ~0x10;
4276   if (i.rex2 & REX_B)
4277     i.vex.bytes[1] |= 0x08;
4278   if (i.rex2 & REX_X)
4279     {
4280       gas_assert (i.rm.mode != 3);
4281       i.vex.bytes[2] &= ~0x04;
4282     }
4283   if (i.vex.register_specifier
4284       && i.vex.register_specifier->reg_flags & RegRex2)
4285     i.vex.bytes[3] &= ~0x08;
4286
4287   /* Encode the NDD bit of the instruction promoted from the legacy
4288      space.  */
4289   if (i.vex.register_specifier && i.tm.opcode_space == SPACE_EVEXMAP4)
4290     i.vex.bytes[3] |= 0x10;
4291
4292   /* Encode the NF bit.  */
4293   if (i.has_nf)
4294     i.vex.bytes[3] |= 0x04;
4295 }
4296
4297 static void establish_rex (void)
4298 {
4299   /* Note that legacy encodings have at most 2 non-immediate operands.  */
4300   unsigned int first = i.imm_operands;
4301   unsigned int last = i.operands > first ? i.operands - first - 1 : first;
4302
4303   /* Respect a user-specified REX prefix.  */
4304   i.rex |= i.prefix[REX_PREFIX] & REX_OPCODE;
4305
4306   /* For 8 bit registers we need an empty rex prefix.  Also if the
4307      instruction already has a prefix, we need to convert old
4308      registers to new ones.  */
4309
4310   if ((i.types[first].bitfield.class == Reg && i.types[first].bitfield.byte
4311        && ((i.op[first].regs->reg_flags & RegRex64) != 0 || i.rex != 0
4312            || i.rex2 != 0))
4313       || (i.types[last].bitfield.class == Reg && i.types[last].bitfield.byte
4314           && ((i.op[last].regs->reg_flags & RegRex64) != 0 || i.rex != 0
4315               || i.rex2 != 0)))
4316     {
4317       unsigned int x;
4318
4319       if (!is_apx_rex2_encoding () && !is_any_vex_encoding(&i.tm))
4320         i.rex |= REX_OPCODE;
4321       for (x = first; x <= last; x++)
4322         {
4323           /* Look for 8 bit operand that uses old registers.  */
4324           if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte
4325               && (i.op[x].regs->reg_flags & RegRex64) == 0)
4326             {
4327               gas_assert (!(i.op[x].regs->reg_flags & RegRex));
4328               /* In case it is "hi" register, give up.  */
4329               if (i.op[x].regs->reg_num > 3)
4330                 as_bad (_("can't encode register '%s%s' in an "
4331                           "instruction requiring REX/REX2 prefix"),
4332                         register_prefix, i.op[x].regs->reg_name);
4333
4334               /* Otherwise it is equivalent to the extended register.
4335                  Since the encoding doesn't change this is merely
4336                  cosmetic cleanup for debug output.  */
4337               i.op[x].regs += 8;
4338             }
4339         }
4340     }
4341
4342   if (i.rex == 0 && i.rex2 == 0 && (i.rex_encoding || i.rex2_encoding))
4343     {
4344       /* Check if we can add a REX_OPCODE byte.  Look for 8 bit operand
4345          that uses legacy register.  If it is "hi" register, don't add
4346          rex and rex2 prefix.  */
4347       unsigned int x;
4348
4349       for (x = first; x <= last; x++)
4350         if (i.types[x].bitfield.class == Reg
4351             && i.types[x].bitfield.byte
4352             && (i.op[x].regs->reg_flags & RegRex64) == 0
4353             && i.op[x].regs->reg_num > 3)
4354           {
4355             gas_assert (!(i.op[x].regs->reg_flags & RegRex));
4356             i.rex_encoding = false;
4357             i.rex2_encoding = false;
4358             break;
4359           }
4360
4361       if (i.rex_encoding)
4362         i.rex = REX_OPCODE;
4363     }
4364
4365   if (is_apx_rex2_encoding ())
4366     {
4367       build_rex2_prefix ();
4368       /* The individual REX.RXBW bits got consumed.  */
4369       i.rex &= REX_OPCODE;
4370     }
4371   else if (i.rex != 0)
4372     add_prefix (REX_OPCODE | i.rex);
4373 }
4374
4375 static void
4376 process_immext (void)
4377 {
4378   expressionS *exp;
4379
4380   /* These AMD 3DNow! and SSE2 instructions have an opcode suffix
4381      which is coded in the same place as an 8-bit immediate field
4382      would be.  Here we fake an 8-bit immediate operand from the
4383      opcode suffix stored in tm.extension_opcode.
4384
4385      AVX instructions also use this encoding, for some of
4386      3 argument instructions.  */
4387
4388   gas_assert (i.imm_operands <= 1
4389               && (i.operands <= 2
4390                   || (is_any_vex_encoding (&i.tm)
4391                       && i.operands <= 4)));
4392
4393   exp = &im_expressions[i.imm_operands++];
4394   i.op[i.operands].imms = exp;
4395   i.types[i.operands].bitfield.imm8 = 1;
4396   i.operands++;
4397   exp->X_op = O_constant;
4398   exp->X_add_number = i.tm.extension_opcode;
4399   i.tm.extension_opcode = None;
4400 }
4401
4402
4403 static int
4404 check_hle (void)
4405 {
4406   switch (i.tm.opcode_modifier.prefixok)
4407     {
4408     default:
4409       abort ();
4410     case PrefixLock:
4411     case PrefixNone:
4412     case PrefixNoTrack:
4413     case PrefixRep:
4414       as_bad (_("invalid instruction `%s' after `%s'"),
4415               insn_name (&i.tm), i.hle_prefix);
4416       return 0;
4417     case PrefixHLELock:
4418       if (i.prefix[LOCK_PREFIX])
4419         return 1;
4420       as_bad (_("missing `lock' with `%s'"), i.hle_prefix);
4421       return 0;
4422     case PrefixHLEAny:
4423       return 1;
4424     case PrefixHLERelease:
4425       if (i.prefix[HLE_PREFIX] != XRELEASE_PREFIX_OPCODE)
4426         {
4427           as_bad (_("instruction `%s' after `xacquire' not allowed"),
4428                   insn_name (&i.tm));
4429           return 0;
4430         }
4431       if (i.mem_operands == 0 || !(i.flags[i.operands - 1] & Operand_Mem))
4432         {
4433           as_bad (_("memory destination needed for instruction `%s'"
4434                     " after `xrelease'"), insn_name (&i.tm));
4435           return 0;
4436         }
4437       return 1;
4438     }
4439 }
4440
4441 /* Encode aligned vector move as unaligned vector move.  */
4442
4443 static void
4444 encode_with_unaligned_vector_move (void)
4445 {
4446   switch (i.tm.base_opcode)
4447     {
4448     case 0x28:  /* Load instructions.  */
4449     case 0x29:  /* Store instructions.  */
4450       /* movaps/movapd/vmovaps/vmovapd.  */
4451       if (i.tm.opcode_space == SPACE_0F
4452           && i.tm.opcode_modifier.opcodeprefix <= PREFIX_0X66)
4453         i.tm.base_opcode = 0x10 | (i.tm.base_opcode & 1);
4454       break;
4455     case 0x6f:  /* Load instructions.  */
4456     case 0x7f:  /* Store instructions.  */
4457       /* movdqa/vmovdqa/vmovdqa64/vmovdqa32. */
4458       if (i.tm.opcode_space == SPACE_0F
4459           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0X66)
4460         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4461       break;
4462     default:
4463       break;
4464     }
4465 }
4466
4467 /* Try the shortest encoding by shortening operand size.  */
4468
4469 static void
4470 optimize_encoding (void)
4471 {
4472   unsigned int j;
4473
4474   if (i.tm.mnem_off == MN_lea)
4475     {
4476       /* Optimize: -O:
4477            lea symbol, %rN    -> mov $symbol, %rN
4478            lea (%rM), %rN     -> mov %rM, %rN
4479            lea (,%rM,1), %rN  -> mov %rM, %rN
4480
4481            and in 32-bit mode for 16-bit addressing
4482
4483            lea (%rM), %rN     -> movzx %rM, %rN
4484
4485            and in 64-bit mode zap 32-bit addressing in favor of using a
4486            32-bit (or less) destination.
4487        */
4488       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
4489         {
4490           if (!i.op[1].regs->reg_type.bitfield.word)
4491             i.tm.opcode_modifier.size = SIZE32;
4492           i.prefix[ADDR_PREFIX] = 0;
4493         }
4494
4495       if (!i.index_reg && !i.base_reg)
4496         {
4497           /* Handle:
4498                lea symbol, %rN    -> mov $symbol, %rN
4499            */
4500           if (flag_code == CODE_64BIT)
4501             {
4502               /* Don't transform a relocation to a 16-bit one.  */
4503               if (i.op[0].disps
4504                   && i.op[0].disps->X_op != O_constant
4505                   && i.op[1].regs->reg_type.bitfield.word)
4506                 return;
4507
4508               if (!i.op[1].regs->reg_type.bitfield.qword
4509                   || i.tm.opcode_modifier.size == SIZE32)
4510                 {
4511                   i.tm.base_opcode = 0xb8;
4512                   i.tm.opcode_modifier.modrm = 0;
4513                   if (!i.op[1].regs->reg_type.bitfield.word)
4514                     i.types[0].bitfield.imm32 = 1;
4515                   else
4516                     {
4517                       i.tm.opcode_modifier.size = SIZE16;
4518                       i.types[0].bitfield.imm16 = 1;
4519                     }
4520                 }
4521               else
4522                 {
4523                   /* Subject to further optimization below.  */
4524                   i.tm.base_opcode = 0xc7;
4525                   i.tm.extension_opcode = 0;
4526                   i.types[0].bitfield.imm32s = 1;
4527                   i.types[0].bitfield.baseindex = 0;
4528                 }
4529             }
4530           /* Outside of 64-bit mode address and operand sizes have to match if
4531              a relocation is involved, as otherwise we wouldn't (currently) or
4532              even couldn't express the relocation correctly.  */
4533           else if (i.op[0].disps
4534                    && i.op[0].disps->X_op != O_constant
4535                    && ((!i.prefix[ADDR_PREFIX])
4536                        != (flag_code == CODE_32BIT
4537                            ? i.op[1].regs->reg_type.bitfield.dword
4538                            : i.op[1].regs->reg_type.bitfield.word)))
4539             return;
4540           /* In 16-bit mode converting LEA with 16-bit addressing and a 32-bit
4541              destination is going to grow encoding size.  */
4542           else if (flag_code == CODE_16BIT
4543                    && (optimize <= 1 || optimize_for_space)
4544                    && !i.prefix[ADDR_PREFIX]
4545                    && i.op[1].regs->reg_type.bitfield.dword)
4546             return;
4547           else
4548             {
4549               i.tm.base_opcode = 0xb8;
4550               i.tm.opcode_modifier.modrm = 0;
4551               if (i.op[1].regs->reg_type.bitfield.dword)
4552                 i.types[0].bitfield.imm32 = 1;
4553               else
4554                 i.types[0].bitfield.imm16 = 1;
4555
4556               if (i.op[0].disps
4557                   && i.op[0].disps->X_op == O_constant
4558                   && i.op[1].regs->reg_type.bitfield.dword
4559                   /* NB: Add () to !i.prefix[ADDR_PREFIX] to silence
4560                      GCC 5. */
4561                   && (!i.prefix[ADDR_PREFIX]) != (flag_code == CODE_32BIT))
4562                 i.op[0].disps->X_add_number &= 0xffff;
4563             }
4564
4565           i.tm.operand_types[0] = i.types[0];
4566           i.imm_operands = 1;
4567           if (!i.op[0].imms)
4568             {
4569               i.op[0].imms = &im_expressions[0];
4570               i.op[0].imms->X_op = O_absent;
4571             }
4572         }
4573       else if (i.op[0].disps
4574                   && (i.op[0].disps->X_op != O_constant
4575                       || i.op[0].disps->X_add_number))
4576         return;
4577       else
4578         {
4579           /* Handle:
4580                lea (%rM), %rN     -> mov %rM, %rN
4581                lea (,%rM,1), %rN  -> mov %rM, %rN
4582                lea (%rM), %rN     -> movzx %rM, %rN
4583            */
4584           const reg_entry *addr_reg;
4585
4586           if (!i.index_reg && i.base_reg->reg_num != RegIP)
4587             addr_reg = i.base_reg;
4588           else if (!i.base_reg
4589                    && i.index_reg->reg_num != RegIZ
4590                    && !i.log2_scale_factor)
4591             addr_reg = i.index_reg;
4592           else
4593             return;
4594
4595           if (addr_reg->reg_type.bitfield.word
4596               && i.op[1].regs->reg_type.bitfield.dword)
4597             {
4598               if (flag_code != CODE_32BIT)
4599                 return;
4600               i.tm.opcode_space = SPACE_0F;
4601               i.tm.base_opcode = 0xb7;
4602             }
4603           else
4604             i.tm.base_opcode = 0x8b;
4605
4606           if (addr_reg->reg_type.bitfield.dword
4607               && i.op[1].regs->reg_type.bitfield.qword)
4608             i.tm.opcode_modifier.size = SIZE32;
4609
4610           i.op[0].regs = addr_reg;
4611           i.reg_operands = 2;
4612         }
4613
4614       i.mem_operands = 0;
4615       i.disp_operands = 0;
4616       i.prefix[ADDR_PREFIX] = 0;
4617       i.prefix[SEG_PREFIX] = 0;
4618       i.seg[0] = NULL;
4619     }
4620
4621   if (optimize_for_space
4622       && i.tm.mnem_off == MN_test
4623       && i.reg_operands == 1
4624       && i.imm_operands == 1
4625       && !i.types[1].bitfield.byte
4626       && i.op[0].imms->X_op == O_constant
4627       && fits_in_imm7 (i.op[0].imms->X_add_number))
4628     {
4629       /* Optimize: -Os:
4630            test $imm7, %r64/%r32/%r16  -> test $imm7, %r8
4631        */
4632       unsigned int base_regnum = i.op[1].regs->reg_num;
4633       if (flag_code == CODE_64BIT || base_regnum < 4)
4634         {
4635           i.types[1].bitfield.byte = 1;
4636           /* Ignore the suffix.  */
4637           i.suffix = 0;
4638           /* Convert to byte registers. 8-bit registers are special,
4639              RegRex64 and non-RegRex64 each have 8 registers.  */
4640           if (i.types[1].bitfield.word)
4641             /* 32 (or 40) 8-bit registers.  */
4642             j = 32;
4643           else if (i.types[1].bitfield.dword)
4644             /* 32 (or 40) 8-bit registers + 32 16-bit registers.  */
4645             j = 64;
4646           else
4647             /* 32 (or 40) 8-bit registers + 32 16-bit registers
4648                + 32 32-bit registers.  */
4649             j = 96;
4650
4651           /* In 64-bit mode, the following byte registers cannot be accessed
4652              if using the Rex and Rex2 prefix: AH, BH, CH, DH */
4653           if (!(i.op[1].regs->reg_flags & (RegRex | RegRex2)) && base_regnum < 4)
4654             j += 8;
4655           i.op[1].regs -= j;
4656         }
4657     }
4658   else if (flag_code == CODE_64BIT
4659            && i.tm.opcode_space == SPACE_BASE
4660            && ((i.types[1].bitfield.qword
4661                 && i.reg_operands == 1
4662                 && i.imm_operands == 1
4663                 && i.op[0].imms->X_op == O_constant
4664                 && ((i.tm.base_opcode == 0xb8
4665                      && i.tm.extension_opcode == None
4666                      && fits_in_unsigned_long (i.op[0].imms->X_add_number))
4667                     || (fits_in_imm31 (i.op[0].imms->X_add_number)
4668                         && (i.tm.base_opcode == 0x24
4669                             || (i.tm.base_opcode == 0x80
4670                                 && i.tm.extension_opcode == 0x4)
4671                             || i.tm.mnem_off == MN_test
4672                             || ((i.tm.base_opcode | 1) == 0xc7
4673                                 && i.tm.extension_opcode == 0x0)))
4674                     || (fits_in_imm7 (i.op[0].imms->X_add_number)
4675                         && i.tm.base_opcode == 0x83
4676                         && i.tm.extension_opcode == 0x4)))
4677                || (i.types[0].bitfield.qword
4678                    && ((i.reg_operands == 2
4679                         && i.op[0].regs == i.op[1].regs
4680                         && (i.tm.mnem_off == MN_xor
4681                             || i.tm.mnem_off == MN_sub))
4682                        || i.tm.mnem_off == MN_clr))))
4683     {
4684       /* Optimize: -O:
4685            andq $imm31, %r64   -> andl $imm31, %r32
4686            andq $imm7, %r64    -> andl $imm7, %r32
4687            testq $imm31, %r64  -> testl $imm31, %r32
4688            xorq %r64, %r64     -> xorl %r32, %r32
4689            subq %r64, %r64     -> subl %r32, %r32
4690            movq $imm31, %r64   -> movl $imm31, %r32
4691            movq $imm32, %r64   -> movl $imm32, %r32
4692         */
4693       i.tm.opcode_modifier.size = SIZE32;
4694       if (i.imm_operands)
4695         {
4696           i.types[0].bitfield.imm32 = 1;
4697           i.types[0].bitfield.imm32s = 0;
4698           i.types[0].bitfield.imm64 = 0;
4699         }
4700       else
4701         {
4702           i.types[0].bitfield.dword = 1;
4703           i.types[0].bitfield.qword = 0;
4704         }
4705       i.types[1].bitfield.dword = 1;
4706       i.types[1].bitfield.qword = 0;
4707       if (i.tm.mnem_off == MN_mov || i.tm.mnem_off == MN_lea)
4708         {
4709           /* Handle
4710                movq $imm31, %r64   -> movl $imm31, %r32
4711                movq $imm32, %r64   -> movl $imm32, %r32
4712            */
4713           i.tm.operand_types[0].bitfield.imm32 = 1;
4714           i.tm.operand_types[0].bitfield.imm32s = 0;
4715           i.tm.operand_types[0].bitfield.imm64 = 0;
4716           if ((i.tm.base_opcode | 1) == 0xc7)
4717             {
4718               /* Handle
4719                    movq $imm31, %r64   -> movl $imm31, %r32
4720                */
4721               i.tm.base_opcode = 0xb8;
4722               i.tm.extension_opcode = None;
4723               i.tm.opcode_modifier.w = 0;
4724               i.tm.opcode_modifier.modrm = 0;
4725             }
4726         }
4727     }
4728   else if (i.reg_operands == 3
4729            && i.op[0].regs == i.op[1].regs
4730            && i.encoding != encoding_evex
4731            && (i.tm.mnem_off == MN_xor
4732                || i.tm.mnem_off == MN_sub))
4733     {
4734       /* Optimize: -O:
4735            xorb %rNb, %rNb, %rMb  -> xorl %rMd, %rMd
4736            xorw %rNw, %rNw, %rMw  -> xorl %rMd, %rMd
4737            xorl %rNd, %rNd, %rMd  -> xorl %rMd, %rMd
4738            xorq %rN,  %rN,  %rM   -> xorl %rMd, %rMd
4739            subb %rNb, %rNb, %rMb  -> subl %rMd, %rMd
4740            subw %rNw, %rNw, %rMw  -> subl %rMd, %rMd
4741            subl %rNd, %rNd, %rMd  -> subl %rMd, %rMd
4742            subq %rN,  %rN,  %rM   -> subl %rMd, %rMd
4743         */
4744       i.tm.opcode_space = SPACE_BASE;
4745       i.tm.opcode_modifier.evex = 0;
4746       i.tm.opcode_modifier.size = SIZE32;
4747       i.types[0].bitfield.byte = 0;
4748       i.types[0].bitfield.word = 0;
4749       i.types[0].bitfield.dword = 1;
4750       i.types[0].bitfield.qword = 0;
4751       i.op[0].regs = i.op[2].regs;
4752       i.types[1] = i.types[0];
4753       i.op[1].regs = i.op[2].regs;
4754       i.reg_operands = 2;
4755     }
4756   else if (optimize > 1
4757            && !optimize_for_space
4758            && i.reg_operands == 2
4759            && i.op[0].regs == i.op[1].regs
4760            && (i.tm.mnem_off == MN_and || i.tm.mnem_off == MN_or)
4761            && (flag_code != CODE_64BIT || !i.types[0].bitfield.dword))
4762     {
4763       /* Optimize: -O2:
4764            andb %rN, %rN  -> testb %rN, %rN
4765            andw %rN, %rN  -> testw %rN, %rN
4766            andq %rN, %rN  -> testq %rN, %rN
4767            orb %rN, %rN   -> testb %rN, %rN
4768            orw %rN, %rN   -> testw %rN, %rN
4769            orq %rN, %rN   -> testq %rN, %rN
4770
4771            and outside of 64-bit mode
4772
4773            andl %rN, %rN  -> testl %rN, %rN
4774            orl %rN, %rN   -> testl %rN, %rN
4775        */
4776       i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1);
4777     }
4778   else if (i.tm.base_opcode == 0xba
4779            && i.tm.opcode_space == SPACE_0F
4780            && i.reg_operands == 1
4781            && i.op[0].imms->X_op == O_constant
4782            && i.op[0].imms->X_add_number >= 0)
4783     {
4784       /* Optimize: -O:
4785            btw $n, %rN -> btl $n, %rN (outside of 16-bit mode, n < 16)
4786            btq $n, %rN -> btl $n, %rN (in 64-bit mode, n < 32, N < 8)
4787            btl $n, %rN -> btw $n, %rN (in 16-bit mode, n < 16)
4788
4789            With <BT> one of bts, btr, and bts also:
4790            <BT>w $n, %rN -> btl $n, %rN (in 32-bit mode, n < 16)
4791            <BT>l $n, %rN -> btw $n, %rN (in 16-bit mode, n < 16)
4792        */
4793       switch (flag_code)
4794         {
4795         case CODE_64BIT:
4796           if (i.tm.extension_opcode != 4)
4797             break;
4798           if (i.types[1].bitfield.qword
4799               && i.op[0].imms->X_add_number < 32
4800               && !(i.op[1].regs->reg_flags & RegRex))
4801             i.tm.opcode_modifier.size = SIZE32;
4802           /* Fall through.  */
4803         case CODE_32BIT:
4804           if (i.types[1].bitfield.word
4805               && i.op[0].imms->X_add_number < 16)
4806             i.tm.opcode_modifier.size = SIZE32;
4807           break;
4808         case CODE_16BIT:
4809           if (i.op[0].imms->X_add_number < 16)
4810             i.tm.opcode_modifier.size = SIZE16;
4811           break;
4812         }
4813     }
4814   else if (i.reg_operands == 3
4815            && i.op[0].regs == i.op[1].regs
4816            && !i.types[2].bitfield.xmmword
4817            && (i.tm.opcode_modifier.vex
4818                || ((!i.mask.reg || i.mask.zeroing)
4819                    && i.tm.opcode_modifier.evex
4820                    && (i.encoding != encoding_evex
4821                        || cpu_arch_isa_flags.bitfield.cpuavx512vl
4822                        || is_cpu (&i.tm, CpuAVX512VL)
4823                        || (i.tm.operand_types[2].bitfield.zmmword
4824                            && i.types[2].bitfield.ymmword))))
4825            && i.tm.opcode_space == SPACE_0F
4826            && ((i.tm.base_opcode | 2) == 0x57
4827                || i.tm.base_opcode == 0xdf
4828                || i.tm.base_opcode == 0xef
4829                || (i.tm.base_opcode | 3) == 0xfb
4830                || i.tm.base_opcode == 0x42
4831                || i.tm.base_opcode == 0x47))
4832     {
4833       /* Optimize: -O1:
4834            VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
4835            vpsubq and vpsubw:
4836              EVEX VOP %zmmM, %zmmM, %zmmN
4837                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4838                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4839              EVEX VOP %ymmM, %ymmM, %ymmN
4840                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4841                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4842              VEX VOP %ymmM, %ymmM, %ymmN
4843                -> VEX VOP %xmmM, %xmmM, %xmmN
4844            VOP, one of vpandn and vpxor:
4845              VEX VOP %ymmM, %ymmM, %ymmN
4846                -> VEX VOP %xmmM, %xmmM, %xmmN
4847            VOP, one of vpandnd and vpandnq:
4848              EVEX VOP %zmmM, %zmmM, %zmmN
4849                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4850                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4851              EVEX VOP %ymmM, %ymmM, %ymmN
4852                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4853                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4854            VOP, one of vpxord and vpxorq:
4855              EVEX VOP %zmmM, %zmmM, %zmmN
4856                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4857                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4858              EVEX VOP %ymmM, %ymmM, %ymmN
4859                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4860                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4861            VOP, one of kxord and kxorq:
4862              VEX VOP %kM, %kM, %kN
4863                -> VEX kxorw %kM, %kM, %kN
4864            VOP, one of kandnd and kandnq:
4865              VEX VOP %kM, %kM, %kN
4866                -> VEX kandnw %kM, %kM, %kN
4867        */
4868       if (i.tm.opcode_modifier.evex)
4869         {
4870           if (i.encoding != encoding_evex)
4871             {
4872               i.tm.opcode_modifier.vex = VEX128;
4873               i.tm.opcode_modifier.vexw = VEXW0;
4874               i.tm.opcode_modifier.evex = 0;
4875               i.encoding = encoding_vex;
4876               i.mask.reg = NULL;
4877             }
4878           else if (optimize > 1)
4879             i.tm.opcode_modifier.evex = EVEX128;
4880           else
4881             return;
4882         }
4883       else if (i.tm.operand_types[0].bitfield.class == RegMask)
4884         {
4885           i.tm.opcode_modifier.opcodeprefix = PREFIX_NONE;
4886           i.tm.opcode_modifier.vexw = VEXW0;
4887         }
4888       else
4889         i.tm.opcode_modifier.vex = VEX128;
4890
4891       if (i.tm.opcode_modifier.vex)
4892         for (j = 0; j < 3; j++)
4893           {
4894             i.types[j].bitfield.xmmword = 1;
4895             i.types[j].bitfield.ymmword = 0;
4896           }
4897     }
4898   else if (i.encoding != encoding_evex
4899            && i.encoding != encoding_egpr
4900            && !i.types[0].bitfield.zmmword
4901            && !i.types[1].bitfield.zmmword
4902            && !i.mask.reg
4903            && !i.broadcast.type
4904            && !i.broadcast.bytes
4905            && i.tm.opcode_modifier.evex
4906            && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4907                || (i.tm.base_opcode & ~4) == 0xdb
4908                || (i.tm.base_opcode & ~4) == 0xeb)
4909            && i.tm.extension_opcode == None)
4910     {
4911       /* Optimize: -O1:
4912            VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
4913            vmovdqu32 and vmovdqu64:
4914              EVEX VOP %xmmM, %xmmN
4915                -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
4916              EVEX VOP %ymmM, %ymmN
4917                -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
4918              EVEX VOP %xmmM, mem
4919                -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
4920              EVEX VOP %ymmM, mem
4921                -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
4922              EVEX VOP mem, %xmmN
4923                -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
4924              EVEX VOP mem, %ymmN
4925                -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
4926            VOP, one of vpand, vpandn, vpor, vpxor:
4927              EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
4928                -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
4929              EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
4930                -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
4931              EVEX VOP{d,q} mem, %xmmM, %xmmN
4932                -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
4933              EVEX VOP{d,q} mem, %ymmM, %ymmN
4934                -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
4935        */
4936       for (j = 0; j < i.operands; j++)
4937         if (operand_type_check (i.types[j], disp)
4938             && i.op[j].disps->X_op == O_constant)
4939           {
4940             /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
4941                has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
4942                bytes, we choose EVEX Disp8 over VEX Disp32.  */
4943             int evex_disp8, vex_disp8;
4944             unsigned int memshift = i.memshift;
4945             offsetT n = i.op[j].disps->X_add_number;
4946
4947             evex_disp8 = fits_in_disp8 (n);
4948             i.memshift = 0;
4949             vex_disp8 = fits_in_disp8 (n);
4950             if (evex_disp8 != vex_disp8)
4951               {
4952                 i.memshift = memshift;
4953                 return;
4954               }
4955
4956             i.types[j].bitfield.disp8 = vex_disp8;
4957             break;
4958           }
4959       if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4960           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2)
4961         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4962       i.tm.opcode_modifier.vex
4963         = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
4964       i.tm.opcode_modifier.vexw = VEXW0;
4965       /* VPAND, VPOR, and VPXOR are commutative.  */
4966       if (i.reg_operands == 3 && i.tm.base_opcode != 0xdf)
4967         i.tm.opcode_modifier.commutative = 1;
4968       i.tm.opcode_modifier.evex = 0;
4969       i.tm.opcode_modifier.masking = 0;
4970       i.tm.opcode_modifier.broadcast = 0;
4971       i.tm.opcode_modifier.disp8memshift = 0;
4972       i.memshift = 0;
4973       if (j < i.operands)
4974         i.types[j].bitfield.disp8
4975           = fits_in_disp8 (i.op[j].disps->X_add_number);
4976     }
4977   else if (optimize_for_space
4978            && i.tm.base_opcode == 0x29
4979            && i.tm.opcode_space == SPACE_0F38
4980            && i.operands == i.reg_operands
4981            && i.op[0].regs == i.op[1].regs
4982            && (!i.tm.opcode_modifier.vex
4983                || !(i.op[0].regs->reg_flags & RegRex))
4984            && !i.tm.opcode_modifier.evex)
4985     {
4986       /* Optimize: -Os:
4987          pcmpeqq %xmmN, %xmmN          -> pcmpeqd %xmmN, %xmmN
4988          vpcmpeqq %xmmN, %xmmN, %xmmM  -> vpcmpeqd %xmmN, %xmmN, %xmmM (N < 8)
4989          vpcmpeqq %ymmN, %ymmN, %ymmM  -> vpcmpeqd %ymmN, %ymmN, %ymmM (N < 8)
4990        */
4991       i.tm.opcode_space = SPACE_0F;
4992       i.tm.base_opcode = 0x76;
4993     }
4994   else if (((i.tm.base_opcode >= 0x64
4995              && i.tm.base_opcode <= 0x66
4996              && i.tm.opcode_space == SPACE_0F)
4997             || (i.tm.base_opcode == 0x37
4998                 && i.tm.opcode_space == SPACE_0F38))
4999            && i.operands == i.reg_operands
5000            && i.op[0].regs == i.op[1].regs
5001            && !i.tm.opcode_modifier.evex)
5002     {
5003       /* Optimize: -O:
5004          pcmpgt[bwd] %mmN, %mmN             -> pxor %mmN, %mmN
5005          pcmpgt[bwdq] %xmmN, %xmmN          -> pxor %xmmN, %xmmN
5006          vpcmpgt[bwdq] %xmmN, %xmmN, %xmmM  -> vpxor %xmmN, %xmmN, %xmmM (N < 8)
5007          vpcmpgt[bwdq] %xmmN, %xmmN, %xmmM  -> vpxor %xmm0, %xmm0, %xmmM (N > 7)
5008          vpcmpgt[bwdq] %ymmN, %ymmN, %ymmM  -> vpxor %ymmN, %ymmN, %ymmM (N < 8)
5009          vpcmpgt[bwdq] %ymmN, %ymmN, %ymmM  -> vpxor %ymm0, %ymm0, %ymmM (N > 7)
5010        */
5011       i.tm.opcode_space = SPACE_0F;
5012       i.tm.base_opcode = 0xef;
5013       if (i.tm.opcode_modifier.vex && (i.op[0].regs->reg_flags & RegRex))
5014         {
5015           if (i.operands == 2)
5016             {
5017               gas_assert (i.tm.opcode_modifier.sse2avx);
5018
5019               i.operands = 3;
5020               i.reg_operands = 3;
5021               i.tm.operands = 3;
5022
5023               i.op[2].regs = i.op[0].regs;
5024               i.types[2] = i.types[0];
5025               i.flags[2] = i.flags[0];
5026               i.tm.operand_types[2] = i.tm.operand_types[0];
5027
5028               i.tm.opcode_modifier.sse2avx = 0;
5029             }
5030           i.op[0].regs -= i.op[0].regs->reg_num + 8;
5031           i.op[1].regs = i.op[0].regs;
5032         }
5033     }
5034   else if (optimize_for_space
5035            && i.tm.base_opcode == 0x59
5036            && i.tm.opcode_space == SPACE_0F38
5037            && i.operands == i.reg_operands
5038            && i.tm.opcode_modifier.vex
5039            && !(i.op[0].regs->reg_flags & RegRex)
5040            && i.op[0].regs->reg_type.bitfield.xmmword
5041            && i.encoding != encoding_vex3)
5042     {
5043       /* Optimize: -Os:
5044          vpbroadcastq %xmmN, %xmmM  -> vpunpcklqdq %xmmN, %xmmN, %xmmM (N < 8)
5045        */
5046       i.tm.opcode_space = SPACE_0F;
5047       i.tm.base_opcode = 0x6c;
5048       i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC1;
5049
5050       ++i.operands;
5051       ++i.reg_operands;
5052       ++i.tm.operands;
5053
5054       i.op[2].regs = i.op[0].regs;
5055       i.types[2] = i.types[0];
5056       i.flags[2] = i.flags[0];
5057       i.tm.operand_types[2] = i.tm.operand_types[0];
5058
5059       swap_2_operands (1, 2);
5060     }
5061 }
5062
5063 static void
5064 s_noopt (int dummy ATTRIBUTE_UNUSED)
5065 {
5066   if (!is_it_end_of_statement ())
5067     as_warn (_("`.noopt' arguments ignored"));
5068
5069   optimize = 0;
5070   optimize_for_space = 0;
5071
5072   ignore_rest_of_line ();
5073 }
5074
5075 /* Return non-zero for load instruction.  */
5076
5077 static int
5078 load_insn_p (void)
5079 {
5080   unsigned int dest;
5081   int any_vex_p = is_any_vex_encoding (&i.tm);
5082   unsigned int base_opcode = i.tm.base_opcode | 1;
5083
5084   if (!any_vex_p)
5085     {
5086       /* Anysize insns: lea, invlpg, clflush, prefetch*, bndmk, bndcl, bndcu,
5087          bndcn, bndstx, bndldx, clflushopt, clwb, cldemote.  */
5088       if (i.tm.opcode_modifier.operandconstraint == ANY_SIZE)
5089         return 0;
5090
5091       /* pop.   */
5092       if (i.tm.mnem_off == MN_pop)
5093         return 1;
5094     }
5095
5096   if (i.tm.opcode_space == SPACE_BASE)
5097     {
5098       /* popf, popa.   */
5099       if (i.tm.base_opcode == 0x9d
5100           || i.tm.base_opcode == 0x61)
5101         return 1;
5102
5103       /* movs, cmps, lods, scas.  */
5104       if ((i.tm.base_opcode | 0xb) == 0xaf)
5105         return 1;
5106
5107       /* outs, xlatb.  */
5108       if (base_opcode == 0x6f
5109           || i.tm.base_opcode == 0xd7)
5110         return 1;
5111       /* NB: For AMD-specific insns with implicit memory operands,
5112          they're intentionally not covered.  */
5113     }
5114
5115   /* No memory operand.  */
5116   if (!i.mem_operands)
5117     return 0;
5118
5119   if (any_vex_p)
5120     {
5121       if (i.tm.mnem_off == MN_vldmxcsr)
5122         return 1;
5123     }
5124   else if (i.tm.opcode_space == SPACE_BASE)
5125     {
5126       /* test, not, neg, mul, imul, div, idiv.  */
5127       if (base_opcode == 0xf7 && i.tm.extension_opcode != 1)
5128         return 1;
5129
5130       /* inc, dec.  */
5131       if (base_opcode == 0xff && i.tm.extension_opcode <= 1)
5132         return 1;
5133
5134       /* add, or, adc, sbb, and, sub, xor, cmp.  */
5135       if (i.tm.base_opcode >= 0x80 && i.tm.base_opcode <= 0x83)
5136         return 1;
5137
5138       /* rol, ror, rcl, rcr, shl/sal, shr, sar. */
5139       if ((base_opcode == 0xc1 || (base_opcode | 2) == 0xd3)
5140           && i.tm.extension_opcode != 6)
5141         return 1;
5142
5143       /* Check for x87 instructions.  */
5144       if ((base_opcode | 6) == 0xdf)
5145         {
5146           /* Skip fst, fstp, fstenv, fstcw.  */
5147           if (i.tm.base_opcode == 0xd9
5148               && (i.tm.extension_opcode == 2
5149                   || i.tm.extension_opcode == 3
5150                   || i.tm.extension_opcode == 6
5151                   || i.tm.extension_opcode == 7))
5152             return 0;
5153
5154           /* Skip fisttp, fist, fistp, fstp.  */
5155           if (i.tm.base_opcode == 0xdb
5156               && (i.tm.extension_opcode == 1
5157                   || i.tm.extension_opcode == 2
5158                   || i.tm.extension_opcode == 3
5159                   || i.tm.extension_opcode == 7))
5160             return 0;
5161
5162           /* Skip fisttp, fst, fstp, fsave, fstsw.  */
5163           if (i.tm.base_opcode == 0xdd
5164               && (i.tm.extension_opcode == 1
5165                   || i.tm.extension_opcode == 2
5166                   || i.tm.extension_opcode == 3
5167                   || i.tm.extension_opcode == 6
5168                   || i.tm.extension_opcode == 7))
5169             return 0;
5170
5171           /* Skip fisttp, fist, fistp, fbstp, fistp.  */
5172           if (i.tm.base_opcode == 0xdf
5173               && (i.tm.extension_opcode == 1
5174                   || i.tm.extension_opcode == 2
5175                   || i.tm.extension_opcode == 3
5176                   || i.tm.extension_opcode == 6
5177                   || i.tm.extension_opcode == 7))
5178             return 0;
5179
5180           return 1;
5181         }
5182     }
5183   else if (i.tm.opcode_space == SPACE_0F)
5184     {
5185       /* bt, bts, btr, btc.  */
5186       if (i.tm.base_opcode == 0xba
5187           && (i.tm.extension_opcode | 3) == 7)
5188         return 1;
5189
5190       /* cmpxchg8b, cmpxchg16b, xrstors, vmptrld.  */
5191       if (i.tm.base_opcode == 0xc7
5192           && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
5193           && (i.tm.extension_opcode == 1 || i.tm.extension_opcode == 3
5194               || i.tm.extension_opcode == 6))
5195         return 1;
5196
5197       /* fxrstor, ldmxcsr, xrstor.  */
5198       if (i.tm.base_opcode == 0xae
5199           && (i.tm.extension_opcode == 1
5200               || i.tm.extension_opcode == 2
5201               || i.tm.extension_opcode == 5))
5202         return 1;
5203
5204       /* lgdt, lidt, lmsw.  */
5205       if (i.tm.base_opcode == 0x01
5206           && (i.tm.extension_opcode == 2
5207               || i.tm.extension_opcode == 3
5208               || i.tm.extension_opcode == 6))
5209         return 1;
5210     }
5211
5212   dest = i.operands - 1;
5213
5214   /* Check fake imm8 operand and 3 source operands.  */
5215   if ((i.tm.opcode_modifier.immext
5216        || i.reg_operands + i.mem_operands == 4)
5217       && i.types[dest].bitfield.imm8)
5218     dest--;
5219
5220   /* add, or, adc, sbb, and, sub, xor, cmp, test, xchg.  */
5221   if (i.tm.opcode_space == SPACE_BASE
5222       && ((base_opcode | 0x38) == 0x39
5223           || (base_opcode | 2) == 0x87))
5224     return 1;
5225
5226   if (i.tm.mnem_off == MN_xadd)
5227     return 1;
5228
5229   /* Check for load instruction.  */
5230   return (i.types[dest].bitfield.class != ClassNone
5231           || i.types[dest].bitfield.instance == Accum);
5232 }
5233
5234 /* Output lfence, 0xfaee8, after instruction.  */
5235
5236 static void
5237 insert_lfence_after (void)
5238 {
5239   if (lfence_after_load && load_insn_p ())
5240     {
5241       /* There are also two REP string instructions that require
5242          special treatment. Specifically, the compare string (CMPS)
5243          and scan string (SCAS) instructions set EFLAGS in a manner
5244          that depends on the data being compared/scanned. When used
5245          with a REP prefix, the number of iterations may therefore
5246          vary depending on this data. If the data is a program secret
5247          chosen by the adversary using an LVI method,
5248          then this data-dependent behavior may leak some aspect
5249          of the secret.  */
5250       if (((i.tm.base_opcode | 0x9) == 0xaf)
5251           && i.prefix[REP_PREFIX])
5252         {
5253             as_warn (_("`%s` changes flags which would affect control flow behavior"),
5254                      insn_name (&i.tm));
5255         }
5256       char *p = frag_more (3);
5257       *p++ = 0xf;
5258       *p++ = 0xae;
5259       *p = 0xe8;
5260     }
5261 }
5262
5263 /* Output lfence, 0xfaee8, before instruction.  */
5264
5265 static void
5266 insert_lfence_before (const struct last_insn *last_insn)
5267 {
5268   char *p;
5269
5270   if (i.tm.opcode_space != SPACE_BASE)
5271     return;
5272
5273   if (i.tm.base_opcode == 0xff
5274       && (i.tm.extension_opcode == 2 || i.tm.extension_opcode == 4))
5275     {
5276       /* Insert lfence before indirect branch if needed.  */
5277
5278       if (lfence_before_indirect_branch == lfence_branch_none)
5279         return;
5280
5281       if (i.operands != 1)
5282         abort ();
5283
5284       if (i.reg_operands == 1)
5285         {
5286           /* Indirect branch via register.  Don't insert lfence with
5287              -mlfence-after-load=yes.  */
5288           if (lfence_after_load
5289               || lfence_before_indirect_branch == lfence_branch_memory)
5290             return;
5291         }
5292       else if (i.mem_operands == 1
5293                && lfence_before_indirect_branch != lfence_branch_register)
5294         {
5295           as_warn (_("indirect `%s` with memory operand should be avoided"),
5296                    insn_name (&i.tm));
5297           return;
5298         }
5299       else
5300         return;
5301
5302       if (last_insn->kind != last_insn_other)
5303         {
5304           as_warn_where (last_insn->file, last_insn->line,
5305                          _("`%s` skips -mlfence-before-indirect-branch on `%s`"),
5306                          last_insn->name, insn_name (&i.tm));
5307           return;
5308         }
5309
5310       p = frag_more (3);
5311       *p++ = 0xf;
5312       *p++ = 0xae;
5313       *p = 0xe8;
5314       return;
5315     }
5316
5317   /* Output or/not/shl and lfence before near ret.  */
5318   if (lfence_before_ret != lfence_before_ret_none
5319       && (i.tm.base_opcode | 1) == 0xc3)
5320     {
5321       if (last_insn->kind != last_insn_other)
5322         {
5323           as_warn_where (last_insn->file, last_insn->line,
5324                          _("`%s` skips -mlfence-before-ret on `%s`"),
5325                          last_insn->name, insn_name (&i.tm));
5326           return;
5327         }
5328
5329       /* Near ret ingore operand size override under CPU64.  */
5330       char prefix = flag_code == CODE_64BIT
5331                     ? 0x48
5332                     : i.prefix[DATA_PREFIX] ? 0x66 : 0x0;
5333
5334       if (lfence_before_ret == lfence_before_ret_not)
5335         {
5336           /* not: 0xf71424, may add prefix
5337              for operand size override or 64-bit code.  */
5338           p = frag_more ((prefix ? 2 : 0) + 6 + 3);
5339           if (prefix)
5340             *p++ = prefix;
5341           *p++ = 0xf7;
5342           *p++ = 0x14;
5343           *p++ = 0x24;
5344           if (prefix)
5345             *p++ = prefix;
5346           *p++ = 0xf7;
5347           *p++ = 0x14;
5348           *p++ = 0x24;
5349         }
5350       else
5351         {
5352           p = frag_more ((prefix ? 1 : 0) + 4 + 3);
5353           if (prefix)
5354             *p++ = prefix;
5355           if (lfence_before_ret == lfence_before_ret_or)
5356             {
5357               /* or: 0x830c2400, may add prefix
5358                  for operand size override or 64-bit code.  */
5359               *p++ = 0x83;
5360               *p++ = 0x0c;
5361             }
5362           else
5363             {
5364               /* shl: 0xc1242400, may add prefix
5365                  for operand size override or 64-bit code.  */
5366               *p++ = 0xc1;
5367               *p++ = 0x24;
5368             }
5369
5370           *p++ = 0x24;
5371           *p++ = 0x0;
5372         }
5373
5374       *p++ = 0xf;
5375       *p++ = 0xae;
5376       *p = 0xe8;
5377     }
5378 }
5379
5380 /* Shared helper for md_assemble() and s_insn().  */
5381 static void init_globals (void)
5382 {
5383   unsigned int j;
5384
5385   memset (&i, '\0', sizeof (i));
5386   i.rounding.type = rc_none;
5387   for (j = 0; j < MAX_OPERANDS; j++)
5388     i.reloc[j] = NO_RELOC;
5389   memset (disp_expressions, '\0', sizeof (disp_expressions));
5390   memset (im_expressions, '\0', sizeof (im_expressions));
5391   save_stack_p = save_stack;
5392 }
5393
5394 /* Helper for md_assemble() to decide whether to prepare for a possible 2nd
5395    parsing pass. Instead of introducing a rarely use new insn attribute this
5396    utilizes a common pattern between affected templates. It is deemed
5397    acceptable that this will lead to unnecessary pass 2 preparations in a
5398    limited set of cases.  */
5399 static INLINE bool may_need_pass2 (const insn_template *t)
5400 {
5401   return t->opcode_modifier.sse2avx
5402          /* Note that all SSE2AVX templates have at least one operand.  */
5403          ? t->operand_types[t->operands - 1].bitfield.class == RegSIMD
5404          : (t->opcode_space == SPACE_0F
5405             && (t->base_opcode | 1) == 0xbf)
5406            || (t->opcode_space == SPACE_BASE
5407                && t->base_opcode == 0x63);
5408 }
5409
5410 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
5411
5412 /* DWARF register number for EFLAGS.  Used for pushf/popf insns.  */
5413 #define GINSN_DW2_REGNUM_EFLAGS     49
5414 /* DWARF register number for RSI.  Used as dummy value when RegIP/RegIZ.  */
5415 #define GINSN_DW2_REGNUM_RSI_DUMMY  4
5416
5417 /* Identify the callee-saved registers in System V AMD64 ABI.  */
5418
5419 bool
5420 x86_scfi_callee_saved_p (unsigned int dw2reg_num)
5421 {
5422   if (dw2reg_num == 3 /* rbx.  */
5423       || dw2reg_num == REG_FP /* rbp.  */
5424       || dw2reg_num == REG_SP /* rsp.  */
5425       || (dw2reg_num >= 12 && dw2reg_num <= 15) /* r12 - r15.  */)
5426     return true;
5427
5428   return false;
5429 }
5430
5431 /* Check whether an instruction prefix which affects operation size
5432    accompanies.  For insns in the legacy space, setting REX.W takes precedence
5433    over the operand-size prefix (66H) when both are used.
5434
5435    The current users of this API are in the handlers for PUSH, POP or other
5436    instructions which affect the stack pointer implicitly:  the operation size
5437    (16, 32, or 64 bits) determines the amount by which the stack pointer is
5438    incremented / decremented (2, 4 or 8).  */
5439
5440 static bool
5441 ginsn_opsize_prefix_p (void)
5442 {
5443   return (!(i.prefix[REX_PREFIX] & REX_W) && i.prefix[DATA_PREFIX]);
5444 }
5445
5446 /* Get the DWARF register number for the given register entry.
5447    For specific byte/word/dword register accesses like al, cl, ah, ch, r8d,
5448    r20w etc., we need to identify the DWARF register number for the
5449    corresponding 8-byte GPR.
5450
5451    This function is a hack - it relies on relative ordering of reg entries in
5452    the i386_regtab.  FIXME - it will be good to allow a more direct way to get
5453    this information.  */
5454
5455 static unsigned int
5456 ginsn_dw2_regnum (const reg_entry *ireg)
5457 {
5458   const reg_entry *temp = ireg;
5459   unsigned int dwarf_reg = Dw2Inval, idx = 0;
5460
5461   /* ginsn creation is available for AMD64 abi only ATM.  Other flag_code
5462      are not expected.  */
5463   gas_assert (ireg && flag_code == CODE_64BIT);
5464
5465   /* Watch out for RegIP, RegIZ.  These are expected to appear only with
5466      base/index addressing modes.  Although creating inaccurate data
5467      dependencies, using a dummy value (lets say volatile register rsi) will
5468      not hurt SCFI.  TBD_GINSN_GEN_NOT_SCFI.  */
5469   if (ireg->reg_num == RegIP || ireg->reg_num == RegIZ)
5470     return GINSN_DW2_REGNUM_RSI_DUMMY;
5471
5472   dwarf_reg = ireg->dw2_regnum[object_64bit];
5473
5474   if (dwarf_reg == Dw2Inval)
5475     {
5476       if (ireg <= &i386_regtab[3])
5477         /* For al, cl, dl, bl, bump over to axl, cxl, dxl, bxl respectively by
5478            adding 8.  */
5479         temp = ireg + 8;
5480       else if (ireg <= &i386_regtab[7])
5481         /* For ah, ch, dh, bh, bump over to axl, cxl, dxl, bxl respectively by
5482            adding 4.  */
5483         temp = ireg + 4;
5484       else
5485         {
5486           /* The code relies on the relative ordering of the reg entries in
5487              i386_regtab.  There are 32 register entries between axl-r31b,
5488              ax-r31w etc.  The assertions here ensures the code does not
5489              recurse indefinitely.  */
5490           gas_assert ((temp - &i386_regtab[0]) >= 0);
5491           idx = temp - &i386_regtab[0];
5492           gas_assert (idx + 32 < i386_regtab_size - 1);
5493
5494           temp = temp + 32;
5495         }
5496
5497       dwarf_reg = ginsn_dw2_regnum (temp);
5498     }
5499
5500   /* Sanity check - failure may indicate state corruption, bad ginsn or
5501      perhaps the i386-reg table and the current function got out of sync.  */
5502   gas_assert (dwarf_reg < Dw2Inval);
5503
5504   return dwarf_reg;
5505 }
5506
5507 static ginsnS *
5508 x86_ginsn_addsub_reg_mem (const symbolS *insn_end_sym)
5509 {
5510   unsigned int dw2_regnum;
5511   unsigned int src1_dw2_regnum;
5512   ginsnS *ginsn = NULL;
5513   ginsnS * (*ginsn_func) (const symbolS *, bool,
5514                           enum ginsn_src_type, unsigned int, offsetT,
5515                           enum ginsn_src_type, unsigned int, offsetT,
5516                           enum ginsn_dst_type, unsigned int, offsetT);
5517   uint16_t opcode = i.tm.base_opcode;
5518
5519   gas_assert (i.tm.opcode_space == SPACE_BASE
5520               && (opcode == 0x1 || opcode == 0x29));
5521   ginsn_func = (opcode == 0x1) ? ginsn_new_add : ginsn_new_sub;
5522
5523   /* op %reg, symbol or even other cases where destination involves indirect
5524      access are unnecessary for SCFI correctness.  TBD_GINSN_GEN_NOT_SCFI.  */
5525   if (i.mem_operands)
5526     return ginsn;
5527
5528   /* Skip detection of 8/16/32-bit op size; 'add/sub reg, reg/mem' ops always
5529      make the dest reg untraceable for SCFI.  */
5530
5531   /* op reg, reg/mem.  */
5532   src1_dw2_regnum = ginsn_dw2_regnum (i.op[0].regs);
5533   /* Of interest only when second opnd is not memory.  */
5534   if (i.reg_operands == 2)
5535     {
5536       dw2_regnum = ginsn_dw2_regnum (i.op[1].regs);
5537       ginsn = ginsn_func (insn_end_sym, true,
5538                           GINSN_SRC_REG, src1_dw2_regnum, 0,
5539                           GINSN_SRC_REG, dw2_regnum, 0,
5540                           GINSN_DST_REG, dw2_regnum, 0);
5541       ginsn_set_where (ginsn);
5542     }
5543
5544   return ginsn;
5545 }
5546
5547 static ginsnS *
5548 x86_ginsn_addsub_mem_reg (const symbolS *insn_end_sym)
5549 {
5550   unsigned int dw2_regnum;
5551   unsigned int src1_dw2_regnum;
5552   const reg_entry *mem_reg;
5553   int32_t gdisp = 0;
5554   ginsnS *ginsn = NULL;
5555   ginsnS * (*ginsn_func) (const symbolS *, bool,
5556                           enum ginsn_src_type, unsigned int, offsetT,
5557                           enum ginsn_src_type, unsigned int, offsetT,
5558                           enum ginsn_dst_type, unsigned int, offsetT);
5559   uint16_t opcode = i.tm.base_opcode;
5560
5561   gas_assert (i.tm.opcode_space == SPACE_BASE
5562               && (opcode == 0x3 || opcode == 0x2b));
5563   ginsn_func = (opcode == 0x3) ? ginsn_new_add : ginsn_new_sub;
5564
5565   /* op symbol, %reg.  */
5566   if (i.mem_operands && !i.base_reg && !i.index_reg)
5567     return ginsn;
5568
5569   /* Skip detection of 8/16/32-bit op size; 'add/sub reg/mem, reg' ops always
5570      make the dest reg untraceable for SCFI.  */
5571
5572   /* op reg/mem, %reg.  */
5573   dw2_regnum = ginsn_dw2_regnum (i.op[1].regs);
5574
5575   if (i.reg_operands == 2)
5576     {
5577       src1_dw2_regnum = ginsn_dw2_regnum (i.op[0].regs);
5578       ginsn = ginsn_func (insn_end_sym, true,
5579                           GINSN_SRC_REG, src1_dw2_regnum, 0,
5580                           GINSN_SRC_REG, dw2_regnum, 0,
5581                           GINSN_DST_REG, dw2_regnum, 0);
5582       ginsn_set_where (ginsn);
5583     }
5584   else if (i.mem_operands)
5585     {
5586       mem_reg = (i.base_reg) ? i.base_reg : i.index_reg;
5587       src1_dw2_regnum = ginsn_dw2_regnum (mem_reg);
5588       if (i.disp_operands == 1)
5589         gdisp = i.op[0].disps->X_add_number;
5590       ginsn = ginsn_func (insn_end_sym, true,
5591                           GINSN_SRC_INDIRECT, src1_dw2_regnum, gdisp,
5592                           GINSN_SRC_REG, dw2_regnum, 0,
5593                           GINSN_DST_REG, dw2_regnum, 0);
5594       ginsn_set_where (ginsn);
5595     }
5596
5597   return ginsn;
5598 }
5599
5600 static ginsnS *
5601 x86_ginsn_alu_imm (const symbolS *insn_end_sym)
5602 {
5603   offsetT src_imm;
5604   unsigned int dw2_regnum;
5605   ginsnS *ginsn = NULL;
5606   enum ginsn_src_type src_type = GINSN_SRC_REG;
5607   enum ginsn_dst_type dst_type = GINSN_DST_REG;
5608
5609   ginsnS * (*ginsn_func) (const symbolS *, bool,
5610                           enum ginsn_src_type, unsigned int, offsetT,
5611                           enum ginsn_src_type, unsigned int, offsetT,
5612                           enum ginsn_dst_type, unsigned int, offsetT);
5613
5614   /* FIXME - create ginsn where dest is REG_SP / REG_FP only ? */
5615   /* Map for insn.tm.extension_opcode
5616      000 ADD    100 AND
5617      001 OR     101 SUB
5618      010 ADC    110 XOR
5619      011 SBB    111 CMP  */
5620
5621   /* add/sub/and imm, %reg only at this time for SCFI.
5622      Although all three ('and', 'or' , 'xor') make the destination reg
5623      untraceable, 'and' op is handled but not 'or' / 'xor' because we will look
5624      into supporting the DRAP pattern at some point.  Other opcodes ('adc',
5625      'sbb' and 'cmp') are not generated here either.  The ginsn representation
5626      does not have support for the latter three opcodes;  GINSN_TYPE_OTHER may
5627      be added for these after x86_ginsn_unhandled () invocation if the
5628      destination register is REG_SP or REG_FP.  */
5629   if (i.tm.extension_opcode == 5)
5630     ginsn_func = ginsn_new_sub;
5631   else if (i.tm.extension_opcode == 4)
5632     ginsn_func = ginsn_new_and;
5633   else if (i.tm.extension_opcode == 0)
5634     ginsn_func = ginsn_new_add;
5635   else
5636     return ginsn;
5637
5638   /* TBD_GINSN_REPRESENTATION_LIMIT: There is no representation for when a
5639      symbol is used as an operand, like so:
5640           addq    $simd_cmp_op+8, %rdx
5641      Skip generating any ginsn for this.  */
5642   if (i.imm_operands == 1
5643       && i.op[0].imms->X_op != O_constant)
5644     return ginsn;
5645
5646   /* addq    $1, symbol
5647      addq    $1, -16(%rbp)
5648      These are not of interest for SCFI.  Also, TBD_GINSN_GEN_NOT_SCFI.  */
5649   if (i.mem_operands == 1)
5650     return ginsn;
5651
5652   /* 8/16/32-bit op size makes the destination reg untraceable for SCFI.
5653      Deal with this via the x86_ginsn_unhandled () code path.  */
5654   if (i.suffix != QWORD_MNEM_SUFFIX)
5655     return ginsn;
5656
5657   gas_assert (i.imm_operands == 1);
5658   src_imm = i.op[0].imms->X_add_number;
5659   /* The second operand may be a register or indirect access.  For SCFI, only
5660      the case when the second opnd is a register is interesting.  Revisit this
5661      if generating ginsns for a different gen mode TBD_GINSN_GEN_NOT_SCFI.  */
5662   if (i.reg_operands == 1)
5663     {
5664       dw2_regnum = ginsn_dw2_regnum (i.op[1].regs);
5665       /* For ginsn, keep the imm as second src operand.  */
5666       ginsn = ginsn_func (insn_end_sym, true,
5667                           src_type, dw2_regnum, 0,
5668                           GINSN_SRC_IMM, 0, src_imm,
5669                           dst_type, dw2_regnum, 0);
5670
5671       ginsn_set_where (ginsn);
5672     }
5673
5674   return ginsn;
5675 }
5676
5677 /* Create ginsn(s) for MOV operations.
5678
5679    The generated ginsns corresponding to mov with indirect access to memory
5680    (src or dest) suffer with loss of information: when both index and base
5681    registers are at play, only base register gets conveyed in ginsn.  Note
5682    this TBD_GINSN_GEN_NOT_SCFI.  */
5683
5684 static ginsnS *
5685 x86_ginsn_move (const symbolS *insn_end_sym)
5686 {
5687   ginsnS *ginsn = NULL;
5688   unsigned int dst_reg;
5689   unsigned int src_reg;
5690   offsetT src_disp = 0;
5691   offsetT dst_disp = 0;
5692   const reg_entry *dst = NULL;
5693   const reg_entry *src = NULL;
5694   uint16_t opcode = i.tm.base_opcode;
5695   enum ginsn_src_type src_type = GINSN_SRC_REG;
5696   enum ginsn_dst_type dst_type = GINSN_DST_REG;
5697
5698   /* mov %reg, symbol or mov symbol, %reg.
5699      Not of interest for SCFI.  Also, TBD_GINSN_GEN_NOT_SCFI.  */
5700   if (i.mem_operands == 1 && !i.base_reg && !i.index_reg)
5701     return ginsn;
5702
5703   /* 8/16/32-bit op size makes the destination reg untraceable for SCFI.
5704      Handle mov reg, reg only.  mov to or from a memory operand will make
5705      dest reg, when present, untraceable, irrespective of the op size.  */
5706   if (i.reg_operands == 2 && i.suffix != QWORD_MNEM_SUFFIX)
5707     return ginsn;
5708
5709   gas_assert (i.tm.opcode_space == SPACE_BASE);
5710   if (opcode == 0x8b || opcode == 0x8a)
5711     {
5712       /* mov  disp(%reg), %reg.  */
5713       if (i.mem_operands)
5714         {
5715           src = (i.base_reg) ? i.base_reg : i.index_reg;
5716           if (i.disp_operands == 1)
5717             src_disp = i.op[0].disps->X_add_number;
5718           src_type = GINSN_SRC_INDIRECT;
5719         }
5720       else
5721         src = i.op[0].regs;
5722
5723       dst = i.op[1].regs;
5724     }
5725   else if (opcode == 0x89 || opcode == 0x88)
5726     {
5727       /* mov %reg, disp(%reg).  */
5728       src = i.op[0].regs;
5729       if (i.mem_operands)
5730         {
5731           dst = (i.base_reg) ? i.base_reg : i.index_reg;
5732           if (i.disp_operands == 1)
5733             dst_disp = i.op[1].disps->X_add_number;
5734           dst_type = GINSN_DST_INDIRECT;
5735         }
5736       else
5737         dst = i.op[1].regs;
5738     }
5739
5740   src_reg = ginsn_dw2_regnum (src);
5741   dst_reg = ginsn_dw2_regnum (dst);
5742
5743   ginsn = ginsn_new_mov (insn_end_sym, true,
5744                          src_type, src_reg, src_disp,
5745                          dst_type, dst_reg, dst_disp);
5746   ginsn_set_where (ginsn);
5747
5748   return ginsn;
5749 }
5750
5751 /* Generate appropriate ginsn for lea.
5752
5753    Unhandled sub-cases (marked with TBD_GINSN_GEN_NOT_SCFI) also suffer with
5754    some loss of information in the final ginsn chosen eventually (type
5755    GINSN_TYPE_OTHER).  But this is fine for now for GINSN_GEN_SCFI generation
5756    mode.  */
5757
5758 static ginsnS *
5759 x86_ginsn_lea (const symbolS *insn_end_sym)
5760 {
5761   offsetT src_disp = 0;
5762   ginsnS *ginsn = NULL;
5763   unsigned int src1_reg;
5764   const reg_entry *src1;
5765   offsetT index_scale;
5766   unsigned int dst_reg;
5767   bool index_regiz_p;
5768
5769   if ((!i.base_reg) != (!i.index_reg || i.index_reg->reg_num == RegIZ))
5770     {
5771       /* lea disp(%base), %dst    or    lea disp(,%index,imm), %dst.
5772          Either index_reg or base_reg exists, but not both.  Further, as per
5773          above, the case when just %index exists but is equal to RegIZ is
5774          excluded.  If not excluded, a GINSN_TYPE_MOV of %rsi
5775          (GINSN_DW2_REGNUM_RSI_DUMMY) to %dst will be generated by this block.
5776          Such a mov ginsn is imprecise; so, exclude now and generate
5777          GINSN_TYPE_OTHER instead later via the x86_ginsn_unhandled ().
5778          Excluding other cases is required due to
5779          TBD_GINSN_REPRESENTATION_LIMIT.  */
5780
5781       index_scale = i.log2_scale_factor;
5782       index_regiz_p = i.index_reg && i.index_reg->reg_num == RegIZ;
5783       src1 = i.base_reg ? i.base_reg : i.index_reg;
5784       src1_reg = ginsn_dw2_regnum (src1);
5785       dst_reg = ginsn_dw2_regnum (i.op[1].regs);
5786       /* It makes sense to represent a scale factor of 1 precisely here
5787          (i.e., not using GINSN_TYPE_OTHER, but rather similar to the
5788          base-without-index case).  A non-zero scale factor is still OK if
5789          the index reg is zero reg.
5790          However, skip from here the case when disp has a symbol instead.
5791          TBD_GINSN_REPRESENTATION_LIMIT.  */
5792       if ((!index_scale || index_regiz_p)
5793           && (!i.disp_operands || i.op[0].disps->X_op == O_constant))
5794         {
5795           if (i.disp_operands)
5796             src_disp = i.op[0].disps->X_add_number;
5797
5798           if (src_disp)
5799             /* Generate an ADD ginsn.  */
5800             ginsn = ginsn_new_add (insn_end_sym, true,
5801                                    GINSN_SRC_REG, src1_reg, 0,
5802                                    GINSN_SRC_IMM, 0, src_disp,
5803                                    GINSN_DST_REG, dst_reg, 0);
5804           else
5805             /* Generate a MOV ginsn.  */
5806             ginsn = ginsn_new_mov (insn_end_sym, true,
5807                                    GINSN_SRC_REG, src1_reg, 0,
5808                                    GINSN_DST_REG, dst_reg, 0);
5809
5810           ginsn_set_where (ginsn);
5811         }
5812     }
5813   /* Skip handling other cases here,
5814      - when (i.index_reg && i.base_reg) is true,
5815        e.g., lea disp(%base,%index,imm), %dst
5816        We do not have a ginsn representation for multiply.
5817      - or, when (!i.index_reg && !i.base_reg) is true,
5818        e.g., lea symbol, %dst
5819        Not a frequent pattern.  If %dst is a register of interest, the user is
5820        likely to use a MOV op anyway.
5821      Deal with these via the x86_ginsn_unhandled () code path to generate
5822      GINSN_TYPE_OTHER when necessary.  TBD_GINSN_GEN_NOT_SCFI.  */
5823
5824   return ginsn;
5825 }
5826
5827 static ginsnS *
5828 x86_ginsn_jump (const symbolS *insn_end_sym, bool cond_p)
5829 {
5830   ginsnS *ginsn = NULL;
5831   const symbolS *src_symbol;
5832   ginsnS * (*ginsn_func) (const symbolS *sym, bool real_p,
5833                           enum ginsn_src_type src_type, unsigned int src_reg,
5834                           const symbolS *src_ginsn_sym);
5835
5836   gas_assert (i.disp_operands == 1);
5837
5838   ginsn_func = cond_p ? ginsn_new_jump_cond : ginsn_new_jump;
5839   if (i.op[0].disps->X_op == O_symbol && !i.op[0].disps->X_add_number)
5840     {
5841       src_symbol = i.op[0].disps->X_add_symbol;
5842       ginsn = ginsn_func (insn_end_sym, true,
5843                           GINSN_SRC_SYMBOL, 0, src_symbol);
5844
5845       ginsn_set_where (ginsn);
5846     }
5847   else
5848     {
5849       /* A non-zero addend in jump/JCC target makes control-flow tracking
5850          difficult.  Skip SCFI for now.  */
5851       as_bad (_("SCFI: `%s' insn with non-zero addend to sym not supported"),
5852               cond_p ? "JCC" : "jmp");
5853       return ginsn;
5854     }
5855
5856   return ginsn;
5857 }
5858
5859 static ginsnS *
5860 x86_ginsn_enter (const symbolS *insn_end_sym)
5861 {
5862   ginsnS *ginsn = NULL;
5863   ginsnS *ginsn_next = NULL;
5864   ginsnS *ginsn_last = NULL;
5865   /* In 64-bit mode, the default stack update size is 8 bytes.  */
5866   int stack_opnd_size = 8;
5867
5868   gas_assert (i.imm_operands == 2);
5869
5870   /* For non-zero size operands, bail out as untraceable for SCFI.  */
5871   if (i.op[0].imms->X_op != O_constant || i.op[0].imms->X_add_symbol != 0
5872       || i.op[1].imms->X_op != O_constant || i.op[1].imms->X_add_symbol != 0)
5873     {
5874       as_bad ("SCFI: enter insn with non-zero operand not supported");
5875       return ginsn;
5876     }
5877
5878   /* Check if this is a 16-bit op.  */
5879   if (ginsn_opsize_prefix_p ())
5880     stack_opnd_size = 2;
5881
5882   /* If the nesting level is 0, the processor pushes the frame pointer from
5883      the BP/EBP/RBP register onto the stack, copies the current stack
5884      pointer from the SP/ESP/RSP register into the BP/EBP/RBP register, and
5885      loads the SP/ESP/RSP register with the current stack-pointer value
5886      minus the value in the size operand.  */
5887   ginsn = ginsn_new_sub (insn_end_sym, false,
5888                          GINSN_SRC_REG, REG_SP, 0,
5889                          GINSN_SRC_IMM, 0, stack_opnd_size,
5890                          GINSN_DST_REG, REG_SP, 0);
5891   ginsn_set_where (ginsn);
5892   ginsn_next = ginsn_new_store (insn_end_sym, false,
5893                                 GINSN_SRC_REG, REG_FP,
5894                                 GINSN_DST_INDIRECT, REG_SP, 0);
5895   ginsn_set_where (ginsn_next);
5896   gas_assert (!ginsn_link_next (ginsn, ginsn_next));
5897   ginsn_last = ginsn_new_mov (insn_end_sym, false,
5898                               GINSN_SRC_REG, REG_SP, 0,
5899                               GINSN_DST_REG, REG_FP, 0);
5900   ginsn_set_where (ginsn_last);
5901   gas_assert (!ginsn_link_next (ginsn_next, ginsn_last));
5902
5903   return ginsn;
5904 }
5905
5906 static ginsnS *
5907 x86_ginsn_leave (const symbolS *insn_end_sym)
5908 {
5909   ginsnS *ginsn = NULL;
5910   ginsnS *ginsn_next = NULL;
5911   ginsnS *ginsn_last = NULL;
5912   /* In 64-bit mode, the default stack update size is 8 bytes.  */
5913   int stack_opnd_size = 8;
5914
5915   /* Check if this is a 16-bit op.  */
5916   if (ginsn_opsize_prefix_p ())
5917     stack_opnd_size = 2;
5918
5919   /* The 'leave' instruction copies the contents of the RBP register
5920      into the RSP register to release all stack space allocated to the
5921      procedure.  */
5922   ginsn = ginsn_new_mov (insn_end_sym, false,
5923                          GINSN_SRC_REG, REG_FP, 0,
5924                          GINSN_DST_REG, REG_SP, 0);
5925   ginsn_set_where (ginsn);
5926   /* Then it restores the old value of the RBP register from the stack.  */
5927   ginsn_next = ginsn_new_load (insn_end_sym, false,
5928                                GINSN_SRC_INDIRECT, REG_SP, 0,
5929                                GINSN_DST_REG, REG_FP);
5930   ginsn_set_where (ginsn_next);
5931   gas_assert (!ginsn_link_next (ginsn, ginsn_next));
5932   ginsn_last = ginsn_new_add (insn_end_sym, false,
5933                               GINSN_SRC_REG, REG_SP, 0,
5934                               GINSN_SRC_IMM, 0, stack_opnd_size,
5935                               GINSN_DST_REG, REG_SP, 0);
5936   ginsn_set_where (ginsn_next);
5937   gas_assert (!ginsn_link_next (ginsn_next, ginsn_last));
5938
5939   return ginsn;
5940 }
5941
5942 /* Check if an instruction is whitelisted.
5943
5944    Some instructions may appear with REG_SP or REG_FP as destination, because
5945    which they are deemed 'interesting' for SCFI.  Whitelist them here if they
5946    do not affect SCFI correctness.  */
5947
5948 static bool
5949 x86_ginsn_safe_to_skip_p (void)
5950 {
5951   bool skip_p = false;
5952   uint16_t opcode = i.tm.base_opcode;
5953
5954   switch (opcode)
5955     {
5956     case 0x80:
5957     case 0x81:
5958     case 0x83:
5959       if (i.tm.opcode_space != SPACE_BASE)
5960         break;
5961       /* cmp imm, reg/rem.  */
5962       if (i.tm.extension_opcode == 7)
5963         skip_p = true;
5964       break;
5965
5966     case 0x38:
5967     case 0x39:
5968     case 0x3a:
5969     case 0x3b:
5970       if (i.tm.opcode_space != SPACE_BASE)
5971         break;
5972       /* cmp imm/reg/mem, reg/rem.  */
5973       skip_p = true;
5974       break;
5975
5976     case 0xf6:
5977     case 0xf7:
5978     case 0x84:
5979     case 0x85:
5980       /* test imm/reg/mem, reg/mem.  */
5981       if (i.tm.opcode_space != SPACE_BASE)
5982         break;
5983       skip_p = true;
5984       break;
5985
5986     default:
5987       break;
5988     }
5989
5990   return skip_p;
5991 }
5992
5993 #define X86_GINSN_UNHANDLED_NONE        0
5994 #define X86_GINSN_UNHANDLED_DEST_REG    1
5995 #define X86_GINSN_UNHANDLED_CFG         2
5996 #define X86_GINSN_UNHANDLED_STACKOP     3
5997 #define X86_GINSN_UNHANDLED_UNEXPECTED  4
5998
5999 /* Check the input insn for its impact on the correctness of the synthesized
6000    CFI.  Returns an error code to the caller.  */
6001
6002 static int
6003 x86_ginsn_unhandled (void)
6004 {
6005   int err = X86_GINSN_UNHANDLED_NONE;
6006   const reg_entry *reg_op;
6007   unsigned int dw2_regnum;
6008
6009   /* Keep an eye out for instructions affecting control flow.  */
6010   if (i.tm.opcode_modifier.jump)
6011     err = X86_GINSN_UNHANDLED_CFG;
6012   /* Also, for any instructions involving an implicit update to the stack
6013      pointer.  */
6014   else if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_STACK_OP)
6015     err = X86_GINSN_UNHANDLED_STACKOP;
6016   /* Finally, also check if the missed instructions are affecting REG_SP or
6017      REG_FP.  The destination operand is the last at all stages of assembly
6018      (due to following AT&T syntax layout in the internal representation).  In
6019      case of Intel syntax input, this still remains true as swap_operands ()
6020      is done by now.
6021      PS: These checks do not involve index / base reg, as indirect memory
6022      accesses via REG_SP or REG_FP do not affect SCFI correctness.
6023      (Also note these instructions are candidates for other ginsn generation
6024      modes in future.  TBD_GINSN_GEN_NOT_SCFI.)  */
6025   else if (i.operands && i.reg_operands
6026            && !(i.flags[i.operands - 1] & Operand_Mem))
6027     {
6028       reg_op = i.op[i.operands - 1].regs;
6029       if (reg_op)
6030         {
6031           dw2_regnum = ginsn_dw2_regnum (reg_op);
6032           if (dw2_regnum == REG_SP || dw2_regnum == REG_FP)
6033             err = X86_GINSN_UNHANDLED_DEST_REG;
6034         }
6035       else
6036         /* Something unexpected.  Indicate to caller.  */
6037         err = X86_GINSN_UNHANDLED_UNEXPECTED;
6038     }
6039
6040   return err;
6041 }
6042
6043 /* Generate one or more generic GAS instructions, a.k.a, ginsns for the current
6044    machine instruction.
6045
6046    Returns the head of linked list of ginsn(s) added, if success; Returns NULL
6047    if failure.
6048
6049    The input ginsn_gen_mode GMODE determines the set of minimal necessary
6050    ginsns necessary for correctness of any passes applicable for that mode.
6051    For supporting the GINSN_GEN_SCFI generation mode, following is the list of
6052    machine instructions that must be translated into the corresponding ginsns
6053    to ensure correctness of SCFI:
6054      - All instructions affecting the two registers that could potentially
6055        be used as the base register for CFA tracking.  For SCFI, the base
6056        register for CFA tracking is limited to REG_SP and REG_FP only for
6057        now.
6058      - All change of flow instructions: conditional and unconditional branches,
6059        call and return from functions.
6060      - All instructions that can potentially be a register save / restore
6061        operation.
6062      - All instructions that perform stack manipulation implicitly: the CALL,
6063        RET, PUSH, POP, ENTER, and LEAVE instructions.
6064
6065    The function currently supports GINSN_GEN_SCFI ginsn generation mode only.
6066    To support other generation modes will require work on this target-specific
6067    process of creation of ginsns:
6068      - Some of such places are tagged with TBD_GINSN_GEN_NOT_SCFI to serve as
6069        possible starting points.
6070      - Also note that ginsn representation may need enhancements.  Specifically,
6071        note some TBD_GINSN_INFO_LOSS and TBD_GINSN_REPRESENTATION_LIMIT markers.
6072    */
6073
6074 static ginsnS *
6075 x86_ginsn_new (const symbolS *insn_end_sym, enum ginsn_gen_mode gmode)
6076 {
6077   int err = 0;
6078   uint16_t opcode;
6079   unsigned int dw2_regnum;
6080   const reg_entry *mem_reg;
6081   ginsnS *ginsn = NULL;
6082   ginsnS *ginsn_next = NULL;
6083   /* In 64-bit mode, the default stack update size is 8 bytes.  */
6084   int stack_opnd_size = 8;
6085
6086   /* Currently supports generation of selected ginsns, sufficient for
6087      the use-case of SCFI only.  */
6088   if (gmode != GINSN_GEN_SCFI)
6089     return ginsn;
6090
6091   opcode = i.tm.base_opcode;
6092
6093   /* Until it is clear how to handle APX NDD and other new opcodes, disallow
6094      them from SCFI.  */
6095   if (is_apx_rex2_encoding ()
6096       || (i.tm.opcode_modifier.evex && is_apx_evex_encoding ()))
6097     {
6098       as_bad (_("SCFI: unsupported APX op %#x may cause incorrect CFI"),
6099               opcode);
6100       return ginsn;
6101     }
6102
6103   switch (opcode)
6104     {
6105
6106     /* Add opcodes 0x0/0x2 and sub opcodes 0x28/0x2a (with opcode_space
6107        SPACE_BASE) are 8-bit ops.  While they are relevant for SCFI
6108        correctness,  skip handling them here and use the x86_ginsn_unhandled
6109        code path to generate GINSN_TYPE_OTHER when necessary.  */
6110
6111     case 0x1:  /* add reg, reg/mem.  */
6112     case 0x29: /* sub reg, reg/mem.  */
6113       if (i.tm.opcode_space != SPACE_BASE)
6114         break;
6115       ginsn = x86_ginsn_addsub_reg_mem (insn_end_sym);
6116       break;
6117
6118     case 0x3:  /* add reg/mem, reg.  */
6119     case 0x2b: /* sub reg/mem, reg.  */
6120       if (i.tm.opcode_space != SPACE_BASE)
6121         break;
6122       ginsn = x86_ginsn_addsub_mem_reg (insn_end_sym);
6123       break;
6124
6125     case 0xa0: /* push fs.  */
6126     case 0xa8: /* push gs.  */
6127       /* push fs / push gs have opcode_space == SPACE_0F.  */
6128       if (i.tm.opcode_space != SPACE_0F)
6129         break;
6130       dw2_regnum = ginsn_dw2_regnum (i.op[0].regs);
6131       /* Check if operation size is 16-bit.  */
6132       if (ginsn_opsize_prefix_p ())
6133         stack_opnd_size = 2;
6134       ginsn = ginsn_new_sub (insn_end_sym, false,
6135                              GINSN_SRC_REG, REG_SP, 0,
6136                              GINSN_SRC_IMM, 0, stack_opnd_size,
6137                              GINSN_DST_REG, REG_SP, 0);
6138       ginsn_set_where (ginsn);
6139       ginsn_next = ginsn_new_store (insn_end_sym, false,
6140                                     GINSN_SRC_REG, dw2_regnum,
6141                                     GINSN_DST_INDIRECT, REG_SP, 0);
6142       ginsn_set_where (ginsn_next);
6143       gas_assert (!ginsn_link_next (ginsn, ginsn_next));
6144       break;
6145
6146     case 0xa1: /* pop fs.  */
6147     case 0xa9: /* pop gs.  */
6148       /* pop fs / pop gs have opcode_space == SPACE_0F.  */
6149       if (i.tm.opcode_space != SPACE_0F)
6150         break;
6151       dw2_regnum = ginsn_dw2_regnum (i.op[0].regs);
6152       /* Check if operation size is 16-bit.  */
6153       if (ginsn_opsize_prefix_p ())
6154         stack_opnd_size = 2;
6155       ginsn = ginsn_new_load (insn_end_sym, false,
6156                               GINSN_SRC_INDIRECT, REG_SP, 0,
6157                               GINSN_DST_REG, dw2_regnum);
6158       ginsn_set_where (ginsn);
6159       ginsn_next = ginsn_new_add (insn_end_sym, false,
6160                                   GINSN_SRC_REG, REG_SP, 0,
6161                                   GINSN_SRC_IMM, 0, stack_opnd_size,
6162                                   GINSN_DST_REG, REG_SP, 0);
6163       ginsn_set_where (ginsn_next);
6164       gas_assert (!ginsn_link_next (ginsn, ginsn_next));
6165       break;
6166
6167     case 0x50 ... 0x57:
6168       if (i.tm.opcode_space != SPACE_BASE)
6169         break;
6170       /* push reg.  */
6171       dw2_regnum = ginsn_dw2_regnum (i.op[0].regs);
6172       /* Check if operation size is 16-bit.  */
6173       if (ginsn_opsize_prefix_p ())
6174         stack_opnd_size = 2;
6175       ginsn = ginsn_new_sub (insn_end_sym, false,
6176                              GINSN_SRC_REG, REG_SP, 0,
6177                              GINSN_SRC_IMM, 0, stack_opnd_size,
6178                              GINSN_DST_REG, REG_SP, 0);
6179       ginsn_set_where (ginsn);
6180       ginsn_next = ginsn_new_store (insn_end_sym, false,
6181                                     GINSN_SRC_REG, dw2_regnum,
6182                                     GINSN_DST_INDIRECT, REG_SP, 0);
6183       ginsn_set_where (ginsn_next);
6184       gas_assert (!ginsn_link_next (ginsn, ginsn_next));
6185       break;
6186
6187     case 0x58 ... 0x5f:
6188       if (i.tm.opcode_space != SPACE_BASE)
6189         break;
6190       /* pop reg.  */
6191       dw2_regnum = ginsn_dw2_regnum (i.op[0].regs);
6192       ginsn = ginsn_new_load (insn_end_sym, false,
6193                               GINSN_SRC_INDIRECT, REG_SP, 0,
6194                               GINSN_DST_REG, dw2_regnum);
6195       ginsn_set_where (ginsn);
6196       /* Check if operation size is 16-bit.  */
6197       if (ginsn_opsize_prefix_p ())
6198         stack_opnd_size = 2;
6199       ginsn_next = ginsn_new_add (insn_end_sym, false,
6200                                   GINSN_SRC_REG, REG_SP, 0,
6201                                   GINSN_SRC_IMM, 0, stack_opnd_size,
6202                                   GINSN_DST_REG, REG_SP, 0);
6203       ginsn_set_where (ginsn_next);
6204       gas_assert (!ginsn_link_next (ginsn, ginsn_next));
6205       break;
6206
6207     case 0x6a: /* push imm8.  */
6208     case 0x68: /* push imm16/imm32.  */
6209       if (i.tm.opcode_space != SPACE_BASE)
6210         break;
6211       /* Check if operation size is 16-bit.  */
6212       if (ginsn_opsize_prefix_p ())
6213         stack_opnd_size = 2;
6214       /* Skip getting the value of imm from machine instruction
6215          because this is not important for SCFI.  */
6216       ginsn = ginsn_new_sub (insn_end_sym, false,
6217                              GINSN_SRC_REG, REG_SP, 0,
6218                              GINSN_SRC_IMM, 0, stack_opnd_size,
6219                              GINSN_DST_REG, REG_SP, 0);
6220       ginsn_set_where (ginsn);
6221       ginsn_next = ginsn_new_store (insn_end_sym, false,
6222                                     GINSN_SRC_IMM, 0,
6223                                     GINSN_DST_INDIRECT, REG_SP, 0);
6224       ginsn_set_where (ginsn_next);
6225       gas_assert (!ginsn_link_next (ginsn, ginsn_next));
6226       break;
6227
6228     /* PS: Opcodes 0x80 ... 0x8f with opcode_space SPACE_0F are present
6229        only after relaxation.  They do not need to be handled for ginsn
6230        creation.  */
6231     case 0x70 ... 0x7f:
6232       if (i.tm.opcode_space != SPACE_BASE)
6233         break;
6234       ginsn = x86_ginsn_jump (insn_end_sym, true);
6235       break;
6236
6237     case 0x80:
6238     case 0x81:
6239     case 0x83:
6240       if (i.tm.opcode_space != SPACE_BASE)
6241         break;
6242       ginsn = x86_ginsn_alu_imm (insn_end_sym);
6243       break;
6244
6245     case 0x8a: /* mov r/m8, r8.  */
6246     case 0x8b: /* mov r/m(16/32/64), r(16/32/64).  */
6247     case 0x88: /* mov r8, r/m8.  */
6248     case 0x89: /* mov r(16/32/64), r/m(16/32/64).  */
6249       if (i.tm.opcode_space != SPACE_BASE)
6250         break;
6251       ginsn = x86_ginsn_move (insn_end_sym);
6252       break;
6253
6254     case 0x8d:
6255       if (i.tm.opcode_space != SPACE_BASE)
6256         break;
6257       /* lea disp(%base,%index,imm), %dst.  */
6258       ginsn = x86_ginsn_lea (insn_end_sym);
6259       break;
6260
6261     case 0x8f:
6262       if (i.tm.opcode_space != SPACE_BASE)
6263         break;
6264       /* pop to reg/mem.  */
6265       if (i.mem_operands)
6266         {
6267           mem_reg = (i.base_reg) ? i.base_reg : i.index_reg;
6268           /* Use dummy register if no base or index.  Unlike other opcodes,
6269              ginsns must be generated as this affect stack pointer.  */
6270           dw2_regnum = (mem_reg
6271                         ? ginsn_dw2_regnum (mem_reg)
6272                         : GINSN_DW2_REGNUM_RSI_DUMMY);
6273         }
6274       else
6275         dw2_regnum = ginsn_dw2_regnum (i.op[0].regs);
6276       ginsn = ginsn_new_load (insn_end_sym, false,
6277                               GINSN_SRC_INDIRECT, REG_SP, 0,
6278                               GINSN_DST_INDIRECT, dw2_regnum);
6279       ginsn_set_where (ginsn);
6280       /* Check if operation size is 16-bit.  */
6281       if (ginsn_opsize_prefix_p ())
6282         stack_opnd_size = 2;
6283       ginsn_next = ginsn_new_add (insn_end_sym, false,
6284                                   GINSN_SRC_REG, REG_SP, 0,
6285                                   GINSN_SRC_IMM, 0, stack_opnd_size,
6286                                   GINSN_DST_REG, REG_SP, 0);
6287       ginsn_set_where (ginsn_next);
6288       gas_assert (!ginsn_link_next (ginsn, ginsn_next));
6289       break;
6290
6291     case 0x9c:
6292       if (i.tm.opcode_space != SPACE_BASE)
6293         break;
6294       /* pushf / pushfq.  */
6295       /* Check if operation size is 16-bit.  */
6296       if (ginsn_opsize_prefix_p ())
6297         stack_opnd_size = 2;
6298       ginsn = ginsn_new_sub (insn_end_sym, false,
6299                              GINSN_SRC_REG, REG_SP, 0,
6300                              GINSN_SRC_IMM, 0, stack_opnd_size,
6301                              GINSN_DST_REG, REG_SP, 0);
6302       ginsn_set_where (ginsn);
6303       /* FIXME - hardcode the actual DWARF reg number value.  As for SCFI
6304          correctness, although this behaves simply a placeholder value; its
6305          just clearer if the value is correct.  */
6306       dw2_regnum = GINSN_DW2_REGNUM_EFLAGS;
6307       ginsn_next = ginsn_new_store (insn_end_sym, false,
6308                                     GINSN_SRC_REG, dw2_regnum,
6309                                     GINSN_DST_INDIRECT, REG_SP, 0);
6310       ginsn_set_where (ginsn_next);
6311       gas_assert (!ginsn_link_next (ginsn, ginsn_next));
6312       break;
6313
6314     case 0x9d:
6315       if (i.tm.opcode_space != SPACE_BASE)
6316         break;
6317       /* popf / popfq.  */
6318       /* Check if operation size is 16-bit.  */
6319       if (ginsn_opsize_prefix_p ())
6320         stack_opnd_size = 2;
6321       /* FIXME - hardcode the actual DWARF reg number value.  As for SCFI
6322          correctness, although this behaves simply a placeholder value; its
6323          just clearer if the value is correct.  */
6324       dw2_regnum = GINSN_DW2_REGNUM_EFLAGS;
6325       ginsn = ginsn_new_load (insn_end_sym, false,
6326                               GINSN_SRC_INDIRECT, REG_SP, 0,
6327                               GINSN_DST_REG, dw2_regnum);
6328       ginsn_set_where (ginsn);
6329       ginsn_next = ginsn_new_add (insn_end_sym, false,
6330                                   GINSN_SRC_REG, REG_SP, 0,
6331                                   GINSN_SRC_IMM, 0, stack_opnd_size,
6332                                   GINSN_DST_REG, REG_SP, 0);
6333       ginsn_set_where (ginsn_next);
6334       gas_assert (!ginsn_link_next (ginsn, ginsn_next));
6335       break;
6336
6337     case 0xff:
6338       if (i.tm.opcode_space != SPACE_BASE)
6339         break;
6340       /* push from reg/mem.  */
6341       if (i.tm.extension_opcode == 6)
6342         {
6343           /* Check if operation size is 16-bit.  */
6344           if (ginsn_opsize_prefix_p ())
6345             stack_opnd_size = 2;
6346           ginsn = ginsn_new_sub (insn_end_sym, false,
6347                                  GINSN_SRC_REG, REG_SP, 0,
6348                                  GINSN_SRC_IMM, 0, stack_opnd_size,
6349                                  GINSN_DST_REG, REG_SP, 0);
6350           ginsn_set_where (ginsn);
6351           if (i.mem_operands)
6352             {
6353               mem_reg = (i.base_reg) ? i.base_reg : i.index_reg;
6354               /* Use dummy register if no base or index.  Unlike other opcodes,
6355                  ginsns must be generated as this affect stack pointer.  */
6356               dw2_regnum = (mem_reg
6357                             ? ginsn_dw2_regnum (mem_reg)
6358                             : GINSN_DW2_REGNUM_RSI_DUMMY);
6359             }
6360           else
6361             dw2_regnum = ginsn_dw2_regnum (i.op[0].regs);
6362           ginsn_next = ginsn_new_store (insn_end_sym, false,
6363                                         GINSN_SRC_INDIRECT, dw2_regnum,
6364                                         GINSN_DST_INDIRECT, REG_SP, 0);
6365           ginsn_set_where (ginsn_next);
6366           gas_assert (!ginsn_link_next (ginsn, ginsn_next));
6367         }
6368       else if (i.tm.extension_opcode == 4)
6369         {
6370           /* jmp r/m.  E.g., notrack jmp *%rax.  */
6371           if (i.reg_operands)
6372             {
6373               dw2_regnum = ginsn_dw2_regnum (i.op[0].regs);
6374               ginsn = ginsn_new_jump (insn_end_sym, true,
6375                                       GINSN_SRC_REG, dw2_regnum, NULL);
6376               ginsn_set_where (ginsn);
6377             }
6378           else if (i.mem_operands && i.index_reg)
6379             {
6380               /* jmp    *0x0(,%rax,8).  */
6381               dw2_regnum = ginsn_dw2_regnum (i.index_reg);
6382               ginsn = ginsn_new_jump (insn_end_sym, true,
6383                                       GINSN_SRC_REG, dw2_regnum, NULL);
6384               ginsn_set_where (ginsn);
6385             }
6386           else if (i.mem_operands && i.base_reg)
6387             {
6388               dw2_regnum = ginsn_dw2_regnum (i.base_reg);
6389               ginsn = ginsn_new_jump (insn_end_sym, true,
6390                                       GINSN_SRC_REG, dw2_regnum, NULL);
6391               ginsn_set_where (ginsn);
6392             }
6393         }
6394       else if (i.tm.extension_opcode == 2)
6395         {
6396           /* 0xFF /2 (call).  */
6397           if (i.reg_operands)
6398             {
6399               dw2_regnum = ginsn_dw2_regnum (i.op[0].regs);
6400               ginsn = ginsn_new_call (insn_end_sym, true,
6401                                       GINSN_SRC_REG, dw2_regnum, NULL);
6402               ginsn_set_where (ginsn);
6403             }
6404           else if (i.mem_operands && i.base_reg)
6405             {
6406               dw2_regnum = ginsn_dw2_regnum (i.base_reg);
6407               ginsn = ginsn_new_call (insn_end_sym, true,
6408                                       GINSN_SRC_REG, dw2_regnum, NULL);
6409               ginsn_set_where (ginsn);
6410             }
6411         }
6412       break;
6413
6414     case 0xc2: /* ret imm16.  */
6415     case 0xc3: /* ret.  */
6416       if (i.tm.opcode_space != SPACE_BASE)
6417         break;
6418       /* Near ret.  */
6419       ginsn = ginsn_new_return (insn_end_sym, true);
6420       ginsn_set_where (ginsn);
6421       break;
6422
6423     case 0xc8:
6424       if (i.tm.opcode_space != SPACE_BASE)
6425         break;
6426       /* enter.  */
6427       ginsn = x86_ginsn_enter (insn_end_sym);
6428       break;
6429
6430     case 0xc9:
6431       if (i.tm.opcode_space != SPACE_BASE)
6432         break;
6433       /* leave.  */
6434       ginsn = x86_ginsn_leave (insn_end_sym);
6435       break;
6436
6437     case 0xe0 ... 0xe2: /* loop / loope / loopne.  */
6438     case 0xe3:          /* jecxz / jrcxz.  */
6439       if (i.tm.opcode_space != SPACE_BASE)
6440         break;
6441       ginsn = x86_ginsn_jump (insn_end_sym, true);
6442       ginsn_set_where (ginsn);
6443       break;
6444
6445     case 0xe8:
6446       if (i.tm.opcode_space != SPACE_BASE)
6447         break;
6448       /* PS: SCFI machinery does not care about which func is being
6449          called.  OK to skip that info.  */
6450       ginsn = ginsn_new_call (insn_end_sym, true,
6451                               GINSN_SRC_SYMBOL, 0, NULL);
6452       ginsn_set_where (ginsn);
6453       break;
6454
6455     /* PS: opcode 0xe9 appears only after relaxation.  Skip here.  */
6456     case 0xeb:
6457       /* If opcode_space != SPACE_BASE, this is not a jmp insn.  Skip it
6458          for GINSN_GEN_SCFI.  */
6459       if (i.tm.opcode_space != SPACE_BASE)
6460         break;
6461       /* Unconditional jmp.  */
6462       ginsn = x86_ginsn_jump (insn_end_sym, false);
6463       ginsn_set_where (ginsn);
6464       break;
6465
6466     default:
6467       /* TBD_GINSN_GEN_NOT_SCFI: Skip all other opcodes uninteresting for
6468          GINSN_GEN_SCFI mode.  */
6469       break;
6470     }
6471
6472   if (!ginsn && !x86_ginsn_safe_to_skip_p ())
6473     {
6474       /* For all unhandled insns that are not whitelisted, check that they do
6475          not impact SCFI correctness.  */
6476       err = x86_ginsn_unhandled ();
6477       switch (err)
6478         {
6479         case X86_GINSN_UNHANDLED_NONE:
6480           break;
6481         case X86_GINSN_UNHANDLED_DEST_REG:
6482           /* Not all writes to REG_FP are harmful in context of SCFI.  Simply
6483              generate a GINSN_TYPE_OTHER with destination set to the
6484              appropriate register.  The SCFI machinery will bail out if this
6485              ginsn affects SCFI correctness.  */
6486           dw2_regnum = ginsn_dw2_regnum (i.op[i.operands - 1].regs);
6487           ginsn = ginsn_new_other (insn_end_sym, true,
6488                                    GINSN_SRC_IMM, 0,
6489                                    GINSN_SRC_IMM, 0,
6490                                    GINSN_DST_REG, dw2_regnum);
6491           ginsn_set_where (ginsn);
6492           break;
6493         case X86_GINSN_UNHANDLED_CFG:
6494         case X86_GINSN_UNHANDLED_STACKOP:
6495           as_bad (_("SCFI: unhandled op %#x may cause incorrect CFI"), opcode);
6496           break;
6497         case X86_GINSN_UNHANDLED_UNEXPECTED:
6498           as_bad (_("SCFI: unexpected op %#x may cause incorrect CFI"),
6499                   opcode);
6500           break;
6501         default:
6502           abort ();
6503           break;
6504         }
6505     }
6506
6507   return ginsn;
6508 }
6509
6510 #endif
6511
6512 /* This is the guts of the machine-dependent assembler.  LINE points to a
6513    machine dependent instruction.  This function is supposed to emit
6514    the frags/bytes it assembles to.  */
6515
6516 void
6517 md_assemble (char *line)
6518 {
6519   unsigned int j;
6520   char mnemonic[MAX_MNEM_SIZE], mnem_suffix = 0, *copy = NULL;
6521   char *xstrdup_copy = NULL;
6522   const char *end, *pass1_mnem = NULL;
6523   enum i386_error pass1_err = 0;
6524   const insn_template *t;
6525   struct last_insn *last_insn
6526     = &seg_info(now_seg)->tc_segment_info_data.last_insn;
6527
6528   /* Initialize globals.  */
6529   current_templates.end = current_templates.start = NULL;
6530  retry:
6531   init_globals ();
6532
6533   /* Suppress optimization when the last thing we saw may not have been
6534      a proper instruction (e.g. a stand-alone prefix or .byte).  */
6535   if (last_insn->kind != last_insn_other)
6536     i.no_optimize = true;
6537
6538   /* First parse an instruction mnemonic & call i386_operand for the operands.
6539      We assume that the scrubber has arranged it so that line[0] is the valid
6540      start of a (possibly prefixed) mnemonic.  */
6541
6542   end = parse_insn (line, mnemonic, false);
6543   if (end == NULL)
6544     {
6545       if (pass1_mnem != NULL)
6546         goto match_error;
6547       if (i.error != no_error)
6548         {
6549           gas_assert (current_templates.start != NULL);
6550           if (may_need_pass2 (current_templates.start) && !i.suffix)
6551             goto no_match;
6552           /* No point in trying a 2nd pass - it'll only find the same suffix
6553              again.  */
6554           mnem_suffix = i.suffix;
6555           goto match_error;
6556         }
6557       return;
6558     }
6559   t = current_templates.start;
6560   /* NB: LINE may be change to be the same as XSTRDUP_COPY.  */
6561   if (xstrdup_copy != line && may_need_pass2 (t))
6562     {
6563       /* Make a copy of the full line in case we need to retry.  */
6564       xstrdup_copy = xstrdup (line);
6565       copy = xstrdup_copy;
6566     }
6567   line += end - line;
6568   mnem_suffix = i.suffix;
6569
6570   line = parse_operands (line, mnemonic);
6571   this_operand = -1;
6572   if (line == NULL)
6573     {
6574       free (xstrdup_copy);
6575       return;
6576     }
6577
6578   /* Now we've parsed the mnemonic into a set of templates, and have the
6579      operands at hand.  */
6580
6581   /* All Intel opcodes have reversed operands except for "bound", "enter",
6582      "invlpg*", "monitor*", "mwait*", "tpause", "umwait", "pvalidate",
6583      "rmpadjust", "rmpupdate", and "rmpquery".  We also don't reverse
6584      intersegment "jmp" and "call" instructions with 2 immediate operands so
6585      that the immediate segment precedes the offset consistently in Intel and
6586      AT&T modes.  */
6587   if (intel_syntax
6588       && i.operands > 1
6589       && (t->mnem_off != MN_bound)
6590       && !startswith (mnemonic, "invlpg")
6591       && !startswith (mnemonic, "monitor")
6592       && !startswith (mnemonic, "mwait")
6593       && (t->mnem_off != MN_pvalidate)
6594       && !startswith (mnemonic, "rmp")
6595       && (t->mnem_off != MN_tpause)
6596       && (t->mnem_off != MN_umwait)
6597       && !(i.operands == 2
6598            && operand_type_check (i.types[0], imm)
6599            && operand_type_check (i.types[1], imm)))
6600     swap_operands ();
6601
6602   /* The order of the immediates should be reversed for 2-immediates EXTRQ
6603      and INSERTQ instructions.  Also UWRMSR wants its immediate to be in the
6604      "canonical" place (first), despite it appearing last (in AT&T syntax, or
6605      because of the swapping above) in the incoming set of operands.  */
6606   if ((i.imm_operands == 2
6607        && (t->mnem_off == MN_extrq || t->mnem_off == MN_insertq))
6608       || (t->mnem_off == MN_uwrmsr && i.imm_operands
6609           && i.operands > i.imm_operands))
6610       swap_2_operands (0, 1);
6611
6612   if (i.imm_operands)
6613     {
6614       /* For USER_MSR instructions, imm32 stands for the name of an model specific
6615          register (MSR). That's an unsigned quantity, whereas all other insns with
6616          32-bit immediate and 64-bit operand size use sign-extended
6617          immediates (imm32s). Therefore these insns are special-cased, bypassing
6618          the normal handling of immediates here.  */
6619       if (is_cpu(current_templates.start, CpuUSER_MSR))
6620         {
6621           for (j = 0; j < i.operands; j++)
6622             {
6623               if (operand_type_check(i.types[j], imm))
6624                 i.types[j] = smallest_imm_type (i.op[j].imms->X_add_number);
6625             }
6626         }
6627       else
6628         optimize_imm ();
6629     }
6630
6631   if (i.disp_operands && !optimize_disp (t))
6632     return;
6633
6634   /* Next, we find a template that matches the given insn,
6635      making sure the overlap of the given operands types is consistent
6636      with the template operand types.  */
6637
6638   if (!(t = match_template (mnem_suffix)))
6639     {
6640       const char *err_msg;
6641
6642       if (copy && !mnem_suffix)
6643         {
6644           line = copy;
6645           copy = NULL;
6646   no_match:
6647           pass1_err = i.error;
6648           pass1_mnem = insn_name (current_templates.start);
6649           goto retry;
6650         }
6651
6652       /* If a non-/only-64bit template (group) was found in pass 1, and if
6653          _some_ template (group) was found in pass 2, squash pass 1's
6654          error.  */
6655       if (pass1_err == unsupported_64bit)
6656         pass1_mnem = NULL;
6657
6658   match_error:
6659       free (xstrdup_copy);
6660
6661       switch (pass1_mnem ? pass1_err : i.error)
6662         {
6663         default:
6664           abort ();
6665         case operand_size_mismatch:
6666           err_msg = _("operand size mismatch");
6667           break;
6668         case operand_type_mismatch:
6669           err_msg = _("operand type mismatch");
6670           break;
6671         case register_type_mismatch:
6672           err_msg = _("register type mismatch");
6673           break;
6674         case number_of_operands_mismatch:
6675           err_msg = _("number of operands mismatch");
6676           break;
6677         case invalid_instruction_suffix:
6678           err_msg = _("invalid instruction suffix");
6679           break;
6680         case bad_imm4:
6681           err_msg = _("constant doesn't fit in 4 bits");
6682           break;
6683         case unsupported_with_intel_mnemonic:
6684           err_msg = _("unsupported with Intel mnemonic");
6685           break;
6686         case unsupported_syntax:
6687           err_msg = _("unsupported syntax");
6688           break;
6689         case unsupported_EGPR_for_addressing:
6690           err_msg = _("extended GPR cannot be used as base/index");
6691           break;
6692         case unsupported_nf:
6693           err_msg = _("{nf} unsupported");
6694           break;
6695         case unsupported:
6696           as_bad (_("unsupported instruction `%s'"),
6697                   pass1_mnem ? pass1_mnem : insn_name (current_templates.start));
6698           return;
6699         case unsupported_on_arch:
6700           as_bad (_("`%s' is not supported on `%s%s'"),
6701                   pass1_mnem ? pass1_mnem : insn_name (current_templates.start),
6702                   cpu_arch_name ? cpu_arch_name : default_arch,
6703                   cpu_sub_arch_name ? cpu_sub_arch_name : "");
6704           return;
6705         case unsupported_64bit:
6706           if (ISLOWER (mnem_suffix))
6707             {
6708               if (flag_code == CODE_64BIT)
6709                 as_bad (_("`%s%c' is not supported in 64-bit mode"),
6710                         pass1_mnem ? pass1_mnem : insn_name (current_templates.start),
6711                         mnem_suffix);
6712               else
6713                 as_bad (_("`%s%c' is only supported in 64-bit mode"),
6714                         pass1_mnem ? pass1_mnem : insn_name (current_templates.start),
6715                         mnem_suffix);
6716             }
6717           else
6718             {
6719               if (flag_code == CODE_64BIT)
6720                 as_bad (_("`%s' is not supported in 64-bit mode"),
6721                         pass1_mnem ? pass1_mnem : insn_name (current_templates.start));
6722               else
6723                 as_bad (_("`%s' is only supported in 64-bit mode"),
6724                         pass1_mnem ? pass1_mnem : insn_name (current_templates.start));
6725             }
6726           return;
6727         case no_vex_encoding:
6728           err_msg = _("no VEX/XOP encoding");
6729           break;
6730         case no_evex_encoding:
6731           err_msg = _("no EVEX encoding");
6732           break;
6733         case invalid_sib_address:
6734           err_msg = _("invalid SIB address");
6735           break;
6736         case invalid_vsib_address:
6737           err_msg = _("invalid VSIB address");
6738           break;
6739         case invalid_vector_register_set:
6740           err_msg = _("mask, index, and destination registers must be distinct");
6741           break;
6742         case invalid_tmm_register_set:
6743           err_msg = _("all tmm registers must be distinct");
6744           break;
6745         case invalid_dest_and_src_register_set:
6746           err_msg = _("destination and source registers must be distinct");
6747           break;
6748         case invalid_dest_register_set:
6749           err_msg = _("two dest registers must be distinct");
6750           break;
6751         case invalid_pseudo_prefix:
6752           err_msg = _("rex2 pseudo prefix cannot be used");
6753           break;
6754         case unsupported_vector_index_register:
6755           err_msg = _("unsupported vector index register");
6756           break;
6757         case unsupported_broadcast:
6758           err_msg = _("unsupported broadcast");
6759           break;
6760         case broadcast_needed:
6761           err_msg = _("broadcast is needed for operand of such type");
6762           break;
6763         case unsupported_masking:
6764           err_msg = _("unsupported masking");
6765           break;
6766         case mask_not_on_destination:
6767           err_msg = _("mask not on destination operand");
6768           break;
6769         case no_default_mask:
6770           err_msg = _("default mask isn't allowed");
6771           break;
6772         case unsupported_rc_sae:
6773           err_msg = _("unsupported static rounding/sae");
6774           break;
6775         case unsupported_vector_size:
6776           as_bad (_("vector size above %u required for `%s'"), 128u << vector_size,
6777                   pass1_mnem ? pass1_mnem : insn_name (current_templates.start));
6778           return;
6779         case unsupported_rsp_register:
6780           err_msg = _("'rsp' register cannot be used");
6781           break;
6782         case internal_error:
6783           err_msg = _("internal error");
6784           break;
6785         }
6786       as_bad (_("%s for `%s'"), err_msg,
6787               pass1_mnem ? pass1_mnem : insn_name (current_templates.start));
6788       return;
6789     }
6790
6791   free (xstrdup_copy);
6792
6793   if (sse_check != check_none
6794       /* The opcode space check isn't strictly needed; it's there only to
6795          bypass the logic below when easily possible.  */
6796       && t->opcode_space >= SPACE_0F
6797       && t->opcode_space <= SPACE_0F3A
6798       && !is_cpu (&i.tm, CpuSSE4a)
6799       && !is_any_vex_encoding (t))
6800     {
6801       /* Some KL and all WideKL insns have only implicit %xmm operands.  */
6802       bool simd = is_cpu (t, CpuKL) || is_cpu (t, CpuWideKL);
6803
6804       for (j = 0; j < t->operands; ++j)
6805         {
6806           if (t->operand_types[j].bitfield.class == RegMMX)
6807             break;
6808           if (t->operand_types[j].bitfield.class == RegSIMD)
6809             simd = true;
6810         }
6811
6812       if (j >= t->operands && simd)
6813         (sse_check == check_warning
6814          ? as_warn
6815          : as_bad) (_("SSE instruction `%s' is used"), insn_name (&i.tm));
6816     }
6817
6818   if (i.tm.opcode_modifier.fwait)
6819     if (!add_prefix (FWAIT_OPCODE))
6820       return;
6821
6822   /* Check if REP prefix is OK.  */
6823   if (i.rep_prefix && i.tm.opcode_modifier.prefixok != PrefixRep)
6824     {
6825       as_bad (_("invalid instruction `%s' after `%s'"),
6826                 insn_name (&i.tm), i.rep_prefix);
6827       return;
6828     }
6829
6830   /* Check for lock without a lockable instruction.  Destination operand
6831      must be memory unless it is xchg (0x86).  */
6832   if (i.prefix[LOCK_PREFIX])
6833     {
6834       if (i.tm.opcode_modifier.prefixok < PrefixLock
6835           || i.mem_operands == 0
6836           || (i.tm.base_opcode != 0x86
6837               && !(i.flags[i.operands - 1] & Operand_Mem)))
6838         {
6839           as_bad (_("expecting lockable instruction after `lock'"));
6840           return;
6841         }
6842
6843       /* Zap the redundant prefix from XCHG when optimizing.  */
6844       if (i.tm.base_opcode == 0x86 && optimize && !i.no_optimize)
6845         i.prefix[LOCK_PREFIX] = 0;
6846     }
6847
6848   if (is_any_vex_encoding (&i.tm)
6849       || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX
6850       || i.tm.operand_types[i.imm_operands + 1].bitfield.class >= RegMMX)
6851     {
6852       /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD insns.  */
6853       if (i.prefix[DATA_PREFIX])
6854         {
6855           as_bad (_("data size prefix invalid with `%s'"), insn_name (&i.tm));
6856           return;
6857         }
6858
6859       /* Don't allow e.g. KMOV in TLS code sequences.  */
6860       for (j = i.imm_operands; j < i.operands; ++j)
6861         switch (i.reloc[j])
6862           {
6863           case BFD_RELOC_X86_64_GOTTPOFF:
6864             if (i.tm.mnem_off == MN_add
6865                 && i.tm.opcode_space == SPACE_EVEXMAP4
6866                 && i.mem_operands == 1
6867                 && i.base_reg
6868                 && i.base_reg->reg_num == RegIP
6869                 && i.tm.operand_types[0].bitfield.class == Reg
6870                 && i.tm.operand_types[2].bitfield.class == Reg)
6871               /* Allow APX: add %reg1, foo@gottpoff(%rip), %reg2.  */
6872               break;
6873             /* Fall through.  */
6874           case BFD_RELOC_386_TLS_GOTIE:
6875           case BFD_RELOC_386_TLS_LE_32:
6876           case BFD_RELOC_X86_64_TLSLD:
6877             as_bad (_("TLS relocation cannot be used with `%s'"), insn_name (&i.tm));
6878             return;
6879           default:
6880             break;
6881           }
6882     }
6883
6884   /* Check if HLE prefix is OK.  */
6885   if (i.hle_prefix && !check_hle ())
6886     return;
6887
6888   /* Check BND prefix.  */
6889   if (i.bnd_prefix && !i.tm.opcode_modifier.bndprefixok)
6890     as_bad (_("expecting valid branch instruction after `bnd'"));
6891
6892   /* Check NOTRACK prefix.  */
6893   if (i.notrack_prefix && i.tm.opcode_modifier.prefixok != PrefixNoTrack)
6894     as_bad (_("expecting indirect branch instruction after `notrack'"));
6895
6896   if (is_cpu (&i.tm, CpuMPX))
6897     {
6898       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
6899         as_bad (_("32-bit address isn't allowed in 64-bit MPX instructions."));
6900       else if (flag_code != CODE_16BIT
6901                ? i.prefix[ADDR_PREFIX]
6902                : i.mem_operands && !i.prefix[ADDR_PREFIX])
6903         as_bad (_("16-bit address isn't allowed in MPX instructions"));
6904     }
6905
6906   /* Insert BND prefix.  */
6907   if (add_bnd_prefix && i.tm.opcode_modifier.bndprefixok)
6908     {
6909       if (!i.prefix[BND_PREFIX])
6910         add_prefix (BND_PREFIX_OPCODE);
6911       else if (i.prefix[BND_PREFIX] != BND_PREFIX_OPCODE)
6912         {
6913           as_warn (_("replacing `rep'/`repe' prefix by `bnd'"));
6914           i.prefix[BND_PREFIX] = BND_PREFIX_OPCODE;
6915         }
6916     }
6917
6918   /* Check string instruction segment overrides.  */
6919   if (i.tm.opcode_modifier.isstring >= IS_STRING_ES_OP0)
6920     {
6921       gas_assert (i.mem_operands);
6922       if (!check_string ())
6923         return;
6924       i.disp_operands = 0;
6925     }
6926
6927   /* The memory operand of (%dx) should be only used with input/output
6928      instructions (base opcodes: 0x6c, 0x6e, 0xec, 0xee).  */
6929   if (i.input_output_operand
6930       && ((i.tm.base_opcode | 0x82) != 0xee
6931           || i.tm.opcode_space != SPACE_BASE))
6932     {
6933       as_bad (_("input/output port address isn't allowed with `%s'"),
6934               insn_name (&i.tm));
6935       return;
6936     }
6937
6938   if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
6939     optimize_encoding ();
6940
6941   /* Past optimization there's no need to distinguish encoding_evex,
6942      encoding_evex512, and encoding_egpr anymore.  */
6943   if (i.encoding == encoding_evex512)
6944     i.encoding = encoding_evex;
6945   else if (i.encoding == encoding_egpr)
6946     i.encoding = is_any_vex_encoding (&i.tm) ? encoding_evex
6947                                              : encoding_default;
6948
6949   if (use_unaligned_vector_move)
6950     encode_with_unaligned_vector_move ();
6951
6952   if (!process_suffix ())
6953     return;
6954
6955   /* Check if IP-relative addressing requirements can be satisfied.  */
6956   if (is_cpu (&i.tm, CpuPREFETCHI)
6957       && !(i.base_reg && i.base_reg->reg_num == RegIP))
6958     as_warn (_("'%s' only supports RIP-relative address"), insn_name (&i.tm));
6959
6960   /* Update operand types and check extended states.  */
6961   for (j = 0; j < i.operands; j++)
6962     {
6963       i.types[j] = operand_type_and (i.types[j], i.tm.operand_types[j]);
6964       switch (i.tm.operand_types[j].bitfield.class)
6965         {
6966         default:
6967           break;
6968         case RegMMX:
6969           i.xstate |= xstate_mmx;
6970           break;
6971         case RegMask:
6972           i.xstate |= xstate_mask;
6973           break;
6974         case RegSIMD:
6975           if (i.tm.operand_types[j].bitfield.tmmword)
6976             i.xstate |= xstate_tmm;
6977           else if (i.tm.operand_types[j].bitfield.zmmword
6978                    && !i.tm.opcode_modifier.vex
6979                    && vector_size >= VSZ512)
6980             i.xstate |= xstate_zmm;
6981           else if (i.tm.operand_types[j].bitfield.ymmword
6982                    && vector_size >= VSZ256)
6983             i.xstate |= xstate_ymm;
6984           else if (i.tm.operand_types[j].bitfield.xmmword)
6985             i.xstate |= xstate_xmm;
6986           break;
6987         }
6988     }
6989
6990   /* Make still unresolved immediate matches conform to size of immediate
6991      given in i.suffix.  */
6992   if (!finalize_imm ())
6993     return;
6994
6995   if (i.types[0].bitfield.imm1)
6996     i.imm_operands = 0; /* kludge for shift insns.  */
6997
6998   /* For insns with operands there are more diddles to do to the opcode.  */
6999   if (i.operands)
7000     {
7001       if (!process_operands ())
7002         return;
7003     }
7004   else if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
7005     {
7006       /* UnixWare fsub no args is alias for fsubp, fadd -> faddp, etc.  */
7007       as_warn (_("translating to `%sp'"), insn_name (&i.tm));
7008     }
7009
7010   if (is_any_vex_encoding (&i.tm))
7011     {
7012       if (!cpu_arch_flags.bitfield.cpui286)
7013         {
7014           as_bad (_("instruction `%s' isn't supported outside of protected mode."),
7015                   insn_name (&i.tm));
7016           return;
7017         }
7018
7019       /* Check for explicit REX prefix.  */
7020       if (i.prefix[REX_PREFIX] || i.rex_encoding)
7021         {
7022           as_bad (_("REX prefix invalid with `%s'"), insn_name (&i.tm));
7023           return;
7024         }
7025
7026       /* Check for explicit REX2 prefix.  */
7027       if (i.rex2_encoding)
7028         {
7029           as_bad (_("{rex2} prefix invalid with `%s'"), insn_name (&i.tm));
7030           return;
7031         }
7032
7033       if (is_apx_evex_encoding ())
7034         build_apx_evex_prefix ();
7035       else if (i.tm.opcode_modifier.vex)
7036         build_vex_prefix (t);
7037       else
7038         build_evex_prefix ();
7039
7040       /* The individual REX.RXBW bits got consumed.  */
7041       i.rex &= REX_OPCODE;
7042
7043       /* The rex2 bits got consumed.  */
7044       i.rex2 = 0;
7045     }
7046
7047   /* Handle conversion of 'int $3' --> special int3 insn.  */
7048   if (i.tm.mnem_off == MN_int
7049       && i.op[0].imms->X_add_number == 3)
7050     {
7051       i.tm.base_opcode = INT3_OPCODE;
7052       i.imm_operands = 0;
7053     }
7054
7055   if ((i.tm.opcode_modifier.jump == JUMP
7056        || i.tm.opcode_modifier.jump == JUMP_BYTE
7057        || i.tm.opcode_modifier.jump == JUMP_DWORD)
7058       && i.op[0].disps->X_op == O_constant)
7059     {
7060       /* Convert "jmp constant" (and "call constant") to a jump (call) to
7061          the absolute address given by the constant.  Since ix86 jumps and
7062          calls are pc relative, we need to generate a reloc.  */
7063       i.op[0].disps->X_add_symbol = &abs_symbol;
7064       i.op[0].disps->X_op = O_symbol;
7065     }
7066
7067   establish_rex ();
7068
7069   insert_lfence_before (last_insn);
7070
7071   /* We are ready to output the insn.  */
7072   output_insn (last_insn);
7073
7074 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
7075   /* PS: SCFI is enabled only for System V AMD64 ABI.  The ABI check has been
7076      performed in i386_target_format.  */
7077   if (IS_ELF && flag_synth_cfi)
7078     {
7079       ginsnS *ginsn;
7080       ginsn = x86_ginsn_new (symbol_temp_new_now (), frch_ginsn_gen_mode ());
7081       frch_ginsn_data_append (ginsn);
7082     }
7083 #endif
7084
7085   insert_lfence_after ();
7086
7087   if (i.tm.opcode_modifier.isprefix)
7088     {
7089       last_insn->kind = last_insn_prefix;
7090       last_insn->name = insn_name (&i.tm);
7091       last_insn->file = as_where (&last_insn->line);
7092     }
7093   else
7094     last_insn->kind = last_insn_other;
7095 }
7096
7097 /* The Q suffix is generally valid only in 64-bit mode, with very few
7098    exceptions: fild, fistp, fisttp, and cmpxchg8b.  Note that for fild
7099    and fisttp only one of their two templates is matched below: That's
7100    sufficient since other relevant attributes are the same between both
7101    respective templates.  */
7102 static INLINE bool q_suffix_allowed(const insn_template *t)
7103 {
7104   return flag_code == CODE_64BIT
7105          || (t->opcode_space == SPACE_BASE
7106              && t->base_opcode == 0xdf
7107              && (t->extension_opcode & 1)) /* fild / fistp / fisttp */
7108          || t->mnem_off == MN_cmpxchg8b;
7109 }
7110
7111 static const char *
7112 parse_insn (const char *line, char *mnemonic, bool prefix_only)
7113 {
7114   const char *l = line, *token_start = l;
7115   char *mnem_p;
7116   bool pass1 = !current_templates.start;
7117   int supported;
7118   const insn_template *t;
7119   char *dot_p = NULL;
7120
7121   while (1)
7122     {
7123       mnem_p = mnemonic;
7124       /* Pseudo-prefixes start with an opening figure brace.  */
7125       if ((*mnem_p = *l) == '{')
7126         {
7127           ++mnem_p;
7128           ++l;
7129         }
7130       while ((*mnem_p = mnemonic_chars[(unsigned char) *l]) != 0)
7131         {
7132           if (*mnem_p == '.')
7133             dot_p = mnem_p;
7134           mnem_p++;
7135           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
7136             {
7137             too_long:
7138               as_bad (_("no such instruction: `%s'"), token_start);
7139               return NULL;
7140             }
7141           l++;
7142         }
7143       /* Pseudo-prefixes end with a closing figure brace.  */
7144       if (*mnemonic == '{' && *l == '}')
7145         {
7146           *mnem_p++ = *l++;
7147           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
7148             goto too_long;
7149           *mnem_p = '\0';
7150
7151           /* Point l at the closing brace if there's no other separator.  */
7152           if (*l != END_OF_INSN && !is_space_char (*l)
7153               && *l != PREFIX_SEPARATOR)
7154             --l;
7155         }
7156       else if (!is_space_char (*l)
7157                && *l != END_OF_INSN
7158                && (intel_syntax
7159                    || (*l != PREFIX_SEPARATOR && *l != ',')))
7160         {
7161           if (prefix_only)
7162             break;
7163           as_bad (_("invalid character %s in mnemonic"),
7164                   output_invalid (*l));
7165           return NULL;
7166         }
7167       if (token_start == l)
7168         {
7169           if (!intel_syntax && *l == PREFIX_SEPARATOR)
7170             as_bad (_("expecting prefix; got nothing"));
7171           else
7172             as_bad (_("expecting mnemonic; got nothing"));
7173           return NULL;
7174         }
7175
7176       /* Look up instruction (or prefix) via hash table.  */
7177       op_lookup (mnemonic);
7178
7179       if (*l != END_OF_INSN
7180           && (!is_space_char (*l) || l[1] != END_OF_INSN)
7181           && current_templates.start
7182           && current_templates.start->opcode_modifier.isprefix)
7183         {
7184           supported = cpu_flags_match (current_templates.start);
7185           if (!(supported & CPU_FLAGS_64BIT_MATCH))
7186             {
7187               as_bad ((flag_code != CODE_64BIT
7188                        ? _("`%s' is only supported in 64-bit mode")
7189                        : _("`%s' is not supported in 64-bit mode")),
7190                       insn_name (current_templates.start));
7191               return NULL;
7192             }
7193           if (supported != CPU_FLAGS_PERFECT_MATCH)
7194             {
7195               as_bad (_("`%s' is not supported on `%s%s'"),
7196                       insn_name (current_templates.start),
7197                       cpu_arch_name ? cpu_arch_name : default_arch,
7198                       cpu_sub_arch_name ? cpu_sub_arch_name : "");
7199               return NULL;
7200             }
7201           /* If we are in 16-bit mode, do not allow addr16 or data16.
7202              Similarly, in 32-bit mode, do not allow addr32 or data32.  */
7203           if ((current_templates.start->opcode_modifier.size == SIZE16
7204                || current_templates.start->opcode_modifier.size == SIZE32)
7205               && flag_code != CODE_64BIT
7206               && ((current_templates.start->opcode_modifier.size == SIZE32)
7207                   ^ (flag_code == CODE_16BIT)))
7208             {
7209               as_bad (_("redundant %s prefix"),
7210                       insn_name (current_templates.start));
7211               return NULL;
7212             }
7213
7214           if (current_templates.start->base_opcode == PSEUDO_PREFIX)
7215             {
7216               /* Handle pseudo prefixes.  */
7217               switch (current_templates.start->extension_opcode)
7218                 {
7219                 case Prefix_Disp8:
7220                   /* {disp8} */
7221                   i.disp_encoding = disp_encoding_8bit;
7222                   break;
7223                 case Prefix_Disp16:
7224                   /* {disp16} */
7225                   i.disp_encoding = disp_encoding_16bit;
7226                   break;
7227                 case Prefix_Disp32:
7228                   /* {disp32} */
7229                   i.disp_encoding = disp_encoding_32bit;
7230                   break;
7231                 case Prefix_Load:
7232                   /* {load} */
7233                   i.dir_encoding = dir_encoding_load;
7234                   break;
7235                 case Prefix_Store:
7236                   /* {store} */
7237                   i.dir_encoding = dir_encoding_store;
7238                   break;
7239                 case Prefix_VEX:
7240                   /* {vex} */
7241                   i.encoding = encoding_vex;
7242                   break;
7243                 case Prefix_VEX3:
7244                   /* {vex3} */
7245                   i.encoding = encoding_vex3;
7246                   break;
7247                 case Prefix_EVEX:
7248                   /* {evex} */
7249                   i.encoding = encoding_evex;
7250                   break;
7251                 case Prefix_REX:
7252                   /* {rex} */
7253                   i.rex_encoding = true;
7254                   break;
7255                 case Prefix_REX2:
7256                   /* {rex2} */
7257                   i.rex2_encoding = true;
7258                   break;
7259                 case Prefix_NF:
7260                   /* {nf} */
7261                   i.has_nf = true;
7262                   if (i.encoding == encoding_default)
7263                     i.encoding = encoding_evex;
7264                   break;
7265                 case Prefix_NoOptimize:
7266                   /* {nooptimize} */
7267                   i.no_optimize = true;
7268                   break;
7269                 default:
7270                   abort ();
7271                 }
7272               if (i.has_nf && i.encoding != encoding_evex)
7273                 {
7274                   as_bad (_("{nf} cannot be combined with {vex}/{vex3}"));
7275                   return NULL;
7276                 }
7277             }
7278           else
7279             {
7280               /* Add prefix, checking for repeated prefixes.  */
7281               switch (add_prefix (current_templates.start->base_opcode))
7282                 {
7283                 case PREFIX_EXIST:
7284                   return NULL;
7285                 case PREFIX_DS:
7286                   if (is_cpu (current_templates.start, CpuIBT))
7287                     i.notrack_prefix = insn_name (current_templates.start);
7288                   break;
7289                 case PREFIX_REP:
7290                   if (is_cpu (current_templates.start, CpuHLE))
7291                     i.hle_prefix = insn_name (current_templates.start);
7292                   else if (is_cpu (current_templates.start, CpuMPX))
7293                     i.bnd_prefix = insn_name (current_templates.start);
7294                   else
7295                     i.rep_prefix = insn_name (current_templates.start);
7296                   break;
7297                 default:
7298                   break;
7299                 }
7300             }
7301           /* Skip past PREFIX_SEPARATOR and reset token_start.  */
7302           token_start = ++l;
7303         }
7304       else
7305         break;
7306     }
7307
7308   if (prefix_only)
7309     return token_start;
7310
7311   if (!current_templates.start)
7312     {
7313       /* Deprecated functionality (new code should use pseudo-prefixes instead):
7314          Check if we should swap operand or force 32bit displacement in
7315          encoding.  */
7316       if (mnem_p - 2 == dot_p && dot_p[1] == 's')
7317         {
7318           if (i.dir_encoding == dir_encoding_default)
7319             i.dir_encoding = dir_encoding_swap;
7320           else
7321             as_warn (_("ignoring `.s' suffix due to earlier `{%s}'"),
7322                      i.dir_encoding == dir_encoding_load ? "load" : "store");
7323         }
7324       else if (mnem_p - 3 == dot_p
7325                && dot_p[1] == 'd'
7326                && dot_p[2] == '8')
7327         {
7328           if (i.disp_encoding == disp_encoding_default)
7329             i.disp_encoding = disp_encoding_8bit;
7330           else if (i.disp_encoding != disp_encoding_8bit)
7331             as_warn (_("ignoring `.d8' suffix due to earlier `{disp<N>}'"));
7332         }
7333       else if (mnem_p - 4 == dot_p
7334                && dot_p[1] == 'd'
7335                && dot_p[2] == '3'
7336                && dot_p[3] == '2')
7337         {
7338           if (i.disp_encoding == disp_encoding_default)
7339             i.disp_encoding = disp_encoding_32bit;
7340           else if (i.disp_encoding != disp_encoding_32bit)
7341             as_warn (_("ignoring `.d32' suffix due to earlier `{disp<N>}'"));
7342         }
7343       else
7344         goto check_suffix;
7345       mnem_p = dot_p;
7346       *dot_p = '\0';
7347       op_lookup (mnemonic);
7348     }
7349
7350   if (!current_templates.start || !pass1)
7351     {
7352       current_templates.start = NULL;
7353
7354     check_suffix:
7355       if (mnem_p > mnemonic)
7356         {
7357           /* See if we can get a match by trimming off a suffix.  */
7358           switch (mnem_p[-1])
7359             {
7360             case WORD_MNEM_SUFFIX:
7361               if (intel_syntax && (intel_float_operand (mnemonic) & 2))
7362                 i.suffix = SHORT_MNEM_SUFFIX;
7363               else
7364                 /* Fall through.  */
7365               case BYTE_MNEM_SUFFIX:
7366               case QWORD_MNEM_SUFFIX:
7367                 i.suffix = mnem_p[-1];
7368               mnem_p[-1] = '\0';
7369               op_lookup (mnemonic);
7370               break;
7371             case SHORT_MNEM_SUFFIX:
7372             case LONG_MNEM_SUFFIX:
7373               if (!intel_syntax)
7374                 {
7375                   i.suffix = mnem_p[-1];
7376                   mnem_p[-1] = '\0';
7377                   op_lookup (mnemonic);
7378                 }
7379               break;
7380
7381               /* Intel Syntax.  */
7382             case 'd':
7383               if (intel_syntax)
7384                 {
7385                   if (intel_float_operand (mnemonic) == 1)
7386                     i.suffix = SHORT_MNEM_SUFFIX;
7387                   else
7388                     i.suffix = LONG_MNEM_SUFFIX;
7389                   mnem_p[-1] = '\0';
7390                   op_lookup (mnemonic);
7391                 }
7392               /* For compatibility reasons accept MOVSD and CMPSD without
7393                  operands even in AT&T mode.  */
7394               else if (*l == END_OF_INSN
7395                        || (is_space_char (*l) && l[1] == END_OF_INSN))
7396                 {
7397                   mnem_p[-1] = '\0';
7398                   op_lookup (mnemonic);
7399                   if (current_templates.start != NULL
7400                       /* MOVS or CMPS */
7401                       && (current_templates.start->base_opcode | 2) == 0xa6
7402                       && current_templates.start->opcode_space
7403                          == SPACE_BASE
7404                       && mnem_p[-2] == 's')
7405                     {
7406                       as_warn (_("found `%sd'; assuming `%sl' was meant"),
7407                                mnemonic, mnemonic);
7408                       i.suffix = LONG_MNEM_SUFFIX;
7409                     }
7410                   else
7411                     {
7412                       current_templates.start = NULL;
7413                       mnem_p[-1] = 'd';
7414                     }
7415                 }
7416               break;
7417             }
7418         }
7419
7420       if (!current_templates.start)
7421         {
7422           if (pass1)
7423             as_bad (_("no such instruction: `%s'"), token_start);
7424           return NULL;
7425         }
7426     }
7427
7428   if (current_templates.start->opcode_modifier.jump == JUMP
7429       || current_templates.start->opcode_modifier.jump == JUMP_BYTE)
7430     {
7431       /* Check for a branch hint.  We allow ",pt" and ",pn" for
7432          predict taken and predict not taken respectively.
7433          I'm not sure that branch hints actually do anything on loop
7434          and jcxz insns (JumpByte) for current Pentium4 chips.  They
7435          may work in the future and it doesn't hurt to accept them
7436          now.  */
7437       if (l[0] == ',' && l[1] == 'p')
7438         {
7439           if (l[2] == 't')
7440             {
7441               if (!add_prefix (DS_PREFIX_OPCODE))
7442                 return NULL;
7443               l += 3;
7444             }
7445           else if (l[2] == 'n')
7446             {
7447               if (!add_prefix (CS_PREFIX_OPCODE))
7448                 return NULL;
7449               l += 3;
7450             }
7451         }
7452     }
7453   /* Any other comma loses.  */
7454   if (*l == ',')
7455     {
7456       as_bad (_("invalid character %s in mnemonic"),
7457               output_invalid (*l));
7458       return NULL;
7459     }
7460
7461   /* Check if instruction is supported on specified architecture.  */
7462   supported = 0;
7463   for (t = current_templates.start; t < current_templates.end; ++t)
7464     {
7465       supported |= cpu_flags_match (t);
7466
7467       if (i.suffix == QWORD_MNEM_SUFFIX && !q_suffix_allowed (t))
7468         supported &= ~CPU_FLAGS_64BIT_MATCH;
7469
7470       if (supported == CPU_FLAGS_PERFECT_MATCH)
7471         return l;
7472     }
7473
7474   if (pass1)
7475     {
7476       if (supported & CPU_FLAGS_64BIT_MATCH)
7477         i.error = unsupported_on_arch;
7478       else
7479         i.error = unsupported_64bit;
7480     }
7481
7482   return NULL;
7483 }
7484
7485 static char *
7486 parse_operands (char *l, const char *mnemonic)
7487 {
7488   char *token_start;
7489
7490   /* 1 if operand is pending after ','.  */
7491   unsigned int expecting_operand = 0;
7492
7493   while (*l != END_OF_INSN)
7494     {
7495       /* Non-zero if operand parens not balanced.  */
7496       unsigned int paren_not_balanced = 0;
7497       /* True if inside double quotes.  */
7498       bool in_quotes = false;
7499
7500       /* Skip optional white space before operand.  */
7501       if (is_space_char (*l))
7502         ++l;
7503       if (!is_operand_char (*l) && *l != END_OF_INSN && *l != '"')
7504         {
7505           as_bad (_("invalid character %s before operand %d"),
7506                   output_invalid (*l),
7507                   i.operands + 1);
7508           return NULL;
7509         }
7510       token_start = l;  /* After white space.  */
7511       while (in_quotes || paren_not_balanced || *l != ',')
7512         {
7513           if (*l == END_OF_INSN)
7514             {
7515               if (in_quotes)
7516                 {
7517                   as_bad (_("unbalanced double quotes in operand %d."),
7518                           i.operands + 1);
7519                   return NULL;
7520                 }
7521               if (paren_not_balanced)
7522                 {
7523                   know (!intel_syntax);
7524                   as_bad (_("unbalanced parenthesis in operand %d."),
7525                           i.operands + 1);
7526                   return NULL;
7527                 }
7528               else
7529                 break;  /* we are done */
7530             }
7531           else if (*l == '\\' && l[1] == '"')
7532             ++l;
7533           else if (*l == '"')
7534             in_quotes = !in_quotes;
7535           else if (!in_quotes && !is_operand_char (*l) && !is_space_char (*l))
7536             {
7537               as_bad (_("invalid character %s in operand %d"),
7538                       output_invalid (*l),
7539                       i.operands + 1);
7540               return NULL;
7541             }
7542           if (!intel_syntax && !in_quotes)
7543             {
7544               if (*l == '(')
7545                 ++paren_not_balanced;
7546               if (*l == ')')
7547                 --paren_not_balanced;
7548             }
7549           l++;
7550         }
7551       if (l != token_start)
7552         {                       /* Yes, we've read in another operand.  */
7553           unsigned int operand_ok;
7554           this_operand = i.operands++;
7555           if (i.operands > MAX_OPERANDS)
7556             {
7557               as_bad (_("spurious operands; (%d operands/instruction max)"),
7558                       MAX_OPERANDS);
7559               return NULL;
7560             }
7561           i.types[this_operand].bitfield.unspecified = 1;
7562           /* Now parse operand adding info to 'i' as we go along.  */
7563           END_STRING_AND_SAVE (l);
7564
7565           if (i.mem_operands > 1)
7566             {
7567               as_bad (_("too many memory references for `%s'"),
7568                       mnemonic);
7569               return 0;
7570             }
7571
7572           if (intel_syntax)
7573             operand_ok =
7574               i386_intel_operand (token_start,
7575                                   intel_float_operand (mnemonic));
7576           else
7577             operand_ok = i386_att_operand (token_start);
7578
7579           RESTORE_END_STRING (l);
7580           if (!operand_ok)
7581             return NULL;
7582         }
7583       else
7584         {
7585           if (expecting_operand)
7586             {
7587             expecting_operand_after_comma:
7588               as_bad (_("expecting operand after ','; got nothing"));
7589               return NULL;
7590             }
7591           if (*l == ',')
7592             {
7593               as_bad (_("expecting operand before ','; got nothing"));
7594               return NULL;
7595             }
7596         }
7597
7598       /* Now *l must be either ',' or END_OF_INSN.  */
7599       if (*l == ',')
7600         {
7601           if (*++l == END_OF_INSN)
7602             {
7603               /* Just skip it, if it's \n complain.  */
7604               goto expecting_operand_after_comma;
7605             }
7606           expecting_operand = 1;
7607         }
7608     }
7609   return l;
7610 }
7611
7612 static void
7613 swap_2_operands (unsigned int xchg1, unsigned int xchg2)
7614 {
7615   union i386_op temp_op;
7616   i386_operand_type temp_type;
7617   unsigned int temp_flags;
7618   enum bfd_reloc_code_real temp_reloc;
7619
7620   temp_type = i.types[xchg2];
7621   i.types[xchg2] = i.types[xchg1];
7622   i.types[xchg1] = temp_type;
7623
7624   temp_flags = i.flags[xchg2];
7625   i.flags[xchg2] = i.flags[xchg1];
7626   i.flags[xchg1] = temp_flags;
7627
7628   temp_op = i.op[xchg2];
7629   i.op[xchg2] = i.op[xchg1];
7630   i.op[xchg1] = temp_op;
7631
7632   temp_reloc = i.reloc[xchg2];
7633   i.reloc[xchg2] = i.reloc[xchg1];
7634   i.reloc[xchg1] = temp_reloc;
7635
7636   temp_flags = i.imm_bits[xchg2];
7637   i.imm_bits[xchg2] = i.imm_bits[xchg1];
7638   i.imm_bits[xchg1] = temp_flags;
7639
7640   if (i.mask.reg)
7641     {
7642       if (i.mask.operand == xchg1)
7643         i.mask.operand = xchg2;
7644       else if (i.mask.operand == xchg2)
7645         i.mask.operand = xchg1;
7646     }
7647   if (i.broadcast.type || i.broadcast.bytes)
7648     {
7649       if (i.broadcast.operand == xchg1)
7650         i.broadcast.operand = xchg2;
7651       else if (i.broadcast.operand == xchg2)
7652         i.broadcast.operand = xchg1;
7653     }
7654 }
7655
7656 static void
7657 swap_operands (void)
7658 {
7659   switch (i.operands)
7660     {
7661     case 5:
7662     case 4:
7663       swap_2_operands (1, i.operands - 2);
7664       /* Fall through.  */
7665     case 3:
7666     case 2:
7667       swap_2_operands (0, i.operands - 1);
7668       break;
7669     default:
7670       abort ();
7671     }
7672
7673   if (i.mem_operands == 2)
7674     {
7675       const reg_entry *temp_seg;
7676       temp_seg = i.seg[0];
7677       i.seg[0] = i.seg[1];
7678       i.seg[1] = temp_seg;
7679     }
7680 }
7681
7682 /* Try to ensure constant immediates are represented in the smallest
7683    opcode possible.  */
7684 static void
7685 optimize_imm (void)
7686 {
7687   char guess_suffix = 0;
7688   int op;
7689
7690   if (i.suffix)
7691     guess_suffix = i.suffix;
7692   else if (i.reg_operands)
7693     {
7694       /* Figure out a suffix from the last register operand specified.
7695          We can't do this properly yet, i.e. excluding special register
7696          instances, but the following works for instructions with
7697          immediates.  In any case, we can't set i.suffix yet.  */
7698       for (op = i.operands; --op >= 0;)
7699         if (i.types[op].bitfield.class != Reg)
7700           continue;
7701         else if (i.types[op].bitfield.byte)
7702           {
7703             guess_suffix = BYTE_MNEM_SUFFIX;
7704             break;
7705           }
7706         else if (i.types[op].bitfield.word)
7707           {
7708             guess_suffix = WORD_MNEM_SUFFIX;
7709             break;
7710           }
7711         else if (i.types[op].bitfield.dword)
7712           {
7713             guess_suffix = LONG_MNEM_SUFFIX;
7714             break;
7715           }
7716         else if (i.types[op].bitfield.qword)
7717           {
7718             guess_suffix = QWORD_MNEM_SUFFIX;
7719             break;
7720           }
7721     }
7722   else if ((flag_code == CODE_16BIT)
7723             ^ (i.prefix[DATA_PREFIX] != 0 && !(i.prefix[REX_PREFIX] & REX_W)))
7724     guess_suffix = WORD_MNEM_SUFFIX;
7725   else if (flag_code != CODE_64BIT
7726            || (!(i.prefix[REX_PREFIX] & REX_W)
7727                /* A more generic (but also more involved) way of dealing
7728                   with the special case(s) would be to go look for
7729                   DefaultSize attributes on any of the templates.  */
7730                && current_templates.start->mnem_off != MN_push))
7731     guess_suffix = LONG_MNEM_SUFFIX;
7732
7733   for (op = i.operands; --op >= 0;)
7734     if (operand_type_check (i.types[op], imm))
7735       {
7736         switch (i.op[op].imms->X_op)
7737           {
7738           case O_constant:
7739             /* If a suffix is given, this operand may be shortened.  */
7740             switch (guess_suffix)
7741               {
7742               case LONG_MNEM_SUFFIX:
7743                 i.types[op].bitfield.imm32 = 1;
7744                 i.types[op].bitfield.imm64 = 1;
7745                 break;
7746               case WORD_MNEM_SUFFIX:
7747                 i.types[op].bitfield.imm16 = 1;
7748                 i.types[op].bitfield.imm32 = 1;
7749                 i.types[op].bitfield.imm32s = 1;
7750                 i.types[op].bitfield.imm64 = 1;
7751                 break;
7752               case BYTE_MNEM_SUFFIX:
7753                 i.types[op].bitfield.imm8 = 1;
7754                 i.types[op].bitfield.imm8s = 1;
7755                 i.types[op].bitfield.imm16 = 1;
7756                 i.types[op].bitfield.imm32 = 1;
7757                 i.types[op].bitfield.imm32s = 1;
7758                 i.types[op].bitfield.imm64 = 1;
7759                 break;
7760               }
7761
7762             /* If this operand is at most 16 bits, convert it
7763                to a signed 16 bit number before trying to see
7764                whether it will fit in an even smaller size.
7765                This allows a 16-bit operand such as $0xffe0 to
7766                be recognised as within Imm8S range.  */
7767             if ((i.types[op].bitfield.imm16)
7768                 && fits_in_unsigned_word (i.op[op].imms->X_add_number))
7769               {
7770                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
7771                                                 ^ 0x8000) - 0x8000);
7772               }
7773 #ifdef BFD64
7774             /* Store 32-bit immediate in 64-bit for 64-bit BFD.  */
7775             if ((i.types[op].bitfield.imm32)
7776                 && fits_in_unsigned_long (i.op[op].imms->X_add_number))
7777               {
7778                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
7779                                                 ^ ((offsetT) 1 << 31))
7780                                                - ((offsetT) 1 << 31));
7781               }
7782 #endif
7783             i.types[op]
7784               = operand_type_or (i.types[op],
7785                                  smallest_imm_type (i.op[op].imms->X_add_number));
7786
7787             /* We must avoid matching of Imm32 templates when 64bit
7788                only immediate is available.  */
7789             if (guess_suffix == QWORD_MNEM_SUFFIX)
7790               i.types[op].bitfield.imm32 = 0;
7791             break;
7792
7793           case O_absent:
7794           case O_register:
7795             abort ();
7796
7797             /* Symbols and expressions.  */
7798           default:
7799             /* Convert symbolic operand to proper sizes for matching, but don't
7800                prevent matching a set of insns that only supports sizes other
7801                than those matching the insn suffix.  */
7802             {
7803               i386_operand_type mask, allowed;
7804               const insn_template *t = current_templates.start;
7805
7806               operand_type_set (&mask, 0);
7807               switch (guess_suffix)
7808                 {
7809                 case QWORD_MNEM_SUFFIX:
7810                   mask.bitfield.imm64 = 1;
7811                   mask.bitfield.imm32s = 1;
7812                   break;
7813                 case LONG_MNEM_SUFFIX:
7814                   mask.bitfield.imm32 = 1;
7815                   break;
7816                 case WORD_MNEM_SUFFIX:
7817                   mask.bitfield.imm16 = 1;
7818                   break;
7819                 case BYTE_MNEM_SUFFIX:
7820                   mask.bitfield.imm8 = 1;
7821                   break;
7822                 default:
7823                   break;
7824                 }
7825
7826               allowed = operand_type_and (t->operand_types[op], mask);
7827               while (++t < current_templates.end)
7828                 {
7829                   allowed = operand_type_or (allowed, t->operand_types[op]);
7830                   allowed = operand_type_and (allowed, mask);
7831                 }
7832
7833               if (!operand_type_all_zero (&allowed))
7834                 i.types[op] = operand_type_and (i.types[op], mask);
7835             }
7836             break;
7837           }
7838       }
7839 }
7840
7841 /* Try to use the smallest displacement type too.  */
7842 static bool
7843 optimize_disp (const insn_template *t)
7844 {
7845   unsigned int op;
7846
7847   if (!want_disp32 (t)
7848       && (!t->opcode_modifier.jump
7849           || i.jumpabsolute || i.types[0].bitfield.baseindex))
7850     {
7851       for (op = 0; op < i.operands; ++op)
7852         {
7853           const expressionS *exp = i.op[op].disps;
7854
7855           if (!operand_type_check (i.types[op], disp))
7856             continue;
7857
7858           if (exp->X_op != O_constant)
7859             continue;
7860
7861           /* Since displacement is signed extended to 64bit, don't allow
7862              disp32 if it is out of range.  */
7863           if (fits_in_signed_long (exp->X_add_number))
7864             continue;
7865
7866           i.types[op].bitfield.disp32 = 0;
7867           if (i.types[op].bitfield.baseindex)
7868             {
7869               as_bad (_("0x%" PRIx64 " out of range of signed 32bit displacement"),
7870                       (uint64_t) exp->X_add_number);
7871               return false;
7872             }
7873         }
7874     }
7875
7876   /* Don't optimize displacement for movabs since it only takes 64bit
7877      displacement.  */
7878   if (i.disp_encoding > disp_encoding_8bit
7879       || (flag_code == CODE_64BIT && t->mnem_off == MN_movabs))
7880     return true;
7881
7882   for (op = i.operands; op-- > 0;)
7883     if (operand_type_check (i.types[op], disp))
7884       {
7885         if (i.op[op].disps->X_op == O_constant)
7886           {
7887             offsetT op_disp = i.op[op].disps->X_add_number;
7888
7889             if (!op_disp && i.types[op].bitfield.baseindex)
7890               {
7891                 i.types[op] = operand_type_and_not (i.types[op], anydisp);
7892                 i.op[op].disps = NULL;
7893                 i.disp_operands--;
7894                 continue;
7895               }
7896
7897             if (i.types[op].bitfield.disp16
7898                 && fits_in_unsigned_word (op_disp))
7899               {
7900                 /* If this operand is at most 16 bits, convert
7901                    to a signed 16 bit number and don't use 64bit
7902                    displacement.  */
7903                 op_disp = ((op_disp ^ 0x8000) - 0x8000);
7904                 i.types[op].bitfield.disp64 = 0;
7905               }
7906
7907 #ifdef BFD64
7908             /* Optimize 64-bit displacement to 32-bit for 64-bit BFD.  */
7909             if ((flag_code != CODE_64BIT
7910                  ? i.types[op].bitfield.disp32
7911                  : want_disp32 (t)
7912                    && (!t->opcode_modifier.jump
7913                        || i.jumpabsolute || i.types[op].bitfield.baseindex))
7914                 && fits_in_unsigned_long (op_disp))
7915               {
7916                 /* If this operand is at most 32 bits, convert
7917                    to a signed 32 bit number and don't use 64bit
7918                    displacement.  */
7919                 op_disp = (op_disp ^ ((offsetT) 1 << 31)) - ((addressT) 1 << 31);
7920                 i.types[op].bitfield.disp64 = 0;
7921                 i.types[op].bitfield.disp32 = 1;
7922               }
7923
7924             if (flag_code == CODE_64BIT && fits_in_signed_long (op_disp))
7925               {
7926                 i.types[op].bitfield.disp64 = 0;
7927                 i.types[op].bitfield.disp32 = 1;
7928               }
7929 #endif
7930             if ((i.types[op].bitfield.disp32
7931                  || i.types[op].bitfield.disp16)
7932                 && fits_in_disp8 (op_disp))
7933               i.types[op].bitfield.disp8 = 1;
7934
7935             i.op[op].disps->X_add_number = op_disp;
7936           }
7937         else if (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
7938                  || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL)
7939           {
7940             fix_new_exp (frag_now, frag_more (0) - frag_now->fr_literal, 0,
7941                          i.op[op].disps, 0, i.reloc[op]);
7942             i.types[op] = operand_type_and_not (i.types[op], anydisp);
7943           }
7944         else
7945           /* We only support 64bit displacement on constants.  */
7946           i.types[op].bitfield.disp64 = 0;
7947       }
7948
7949   return true;
7950 }
7951
7952 /* Return 1 if there is a match in broadcast bytes between operand
7953    GIVEN and instruction template T.   */
7954
7955 static INLINE int
7956 match_broadcast_size (const insn_template *t, unsigned int given)
7957 {
7958   return ((t->opcode_modifier.broadcast == BYTE_BROADCAST
7959            && i.types[given].bitfield.byte)
7960           || (t->opcode_modifier.broadcast == WORD_BROADCAST
7961               && i.types[given].bitfield.word)
7962           || (t->opcode_modifier.broadcast == DWORD_BROADCAST
7963               && i.types[given].bitfield.dword)
7964           || (t->opcode_modifier.broadcast == QWORD_BROADCAST
7965               && i.types[given].bitfield.qword));
7966 }
7967
7968 /* Check if operands are valid for the instruction.  */
7969
7970 static int
7971 check_VecOperands (const insn_template *t)
7972 {
7973   unsigned int op;
7974   i386_cpu_flags cpu;
7975
7976   /* Templates allowing for ZMMword as well as YMMword and/or XMMword for
7977      any one operand are implicity requiring AVX512VL support if the actual
7978      operand size is YMMword or XMMword.  Since this function runs after
7979      template matching, there's no need to check for YMMword/XMMword in
7980      the template.  */
7981   cpu = cpu_flags_and (cpu_flags_from_attr (t->cpu), avx512);
7982   if (!cpu_flags_all_zero (&cpu)
7983       && !is_cpu (t, CpuAVX512VL)
7984       && !cpu_arch_flags.bitfield.cpuavx512vl
7985       && (!t->opcode_modifier.vex || need_evex_encoding (t)))
7986     {
7987       for (op = 0; op < t->operands; ++op)
7988         {
7989           if (t->operand_types[op].bitfield.zmmword
7990               && (i.types[op].bitfield.ymmword
7991                   || i.types[op].bitfield.xmmword))
7992             {
7993               i.error = operand_size_mismatch;
7994               return 1;
7995             }
7996         }
7997     }
7998
7999   /* Somewhat similarly, templates specifying both AVX and AVX2 are
8000      requiring AVX2 support if the actual operand size is YMMword.  */
8001   if (maybe_cpu (t, CpuAVX) && maybe_cpu (t, CpuAVX2)
8002       && !cpu_arch_flags.bitfield.cpuavx2)
8003     {
8004       for (op = 0; op < t->operands; ++op)
8005         {
8006           if (t->operand_types[op].bitfield.xmmword
8007               && i.types[op].bitfield.ymmword)
8008             {
8009               i.error = operand_size_mismatch;
8010               return 1;
8011             }
8012         }
8013     }
8014
8015   /* Without VSIB byte, we can't have a vector register for index.  */
8016   if (!t->opcode_modifier.sib
8017       && i.index_reg
8018       && (i.index_reg->reg_type.bitfield.xmmword
8019           || i.index_reg->reg_type.bitfield.ymmword
8020           || i.index_reg->reg_type.bitfield.zmmword))
8021     {
8022       i.error = unsupported_vector_index_register;
8023       return 1;
8024     }
8025
8026   /* Check if default mask is allowed.  */
8027   if (t->opcode_modifier.operandconstraint == NO_DEFAULT_MASK
8028       && (!i.mask.reg || i.mask.reg->reg_num == 0))
8029     {
8030       i.error = no_default_mask;
8031       return 1;
8032     }
8033
8034   /* For VSIB byte, we need a vector register for index, and all vector
8035      registers must be distinct.  */
8036   if (t->opcode_modifier.sib && t->opcode_modifier.sib != SIBMEM)
8037     {
8038       if (!i.index_reg
8039           || !((t->opcode_modifier.sib == VECSIB128
8040                 && i.index_reg->reg_type.bitfield.xmmword)
8041                || (t->opcode_modifier.sib == VECSIB256
8042                    && i.index_reg->reg_type.bitfield.ymmword)
8043                || (t->opcode_modifier.sib == VECSIB512
8044                    && i.index_reg->reg_type.bitfield.zmmword)))
8045       {
8046         i.error = invalid_vsib_address;
8047         return 1;
8048       }
8049
8050       gas_assert (i.reg_operands == 2 || i.mask.reg);
8051       if (i.reg_operands == 2 && !i.mask.reg)
8052         {
8053           gas_assert (i.types[0].bitfield.class == RegSIMD);
8054           gas_assert (i.types[0].bitfield.xmmword
8055                       || i.types[0].bitfield.ymmword);
8056           gas_assert (i.types[2].bitfield.class == RegSIMD);
8057           gas_assert (i.types[2].bitfield.xmmword
8058                       || i.types[2].bitfield.ymmword);
8059           if (operand_check == check_none)
8060             return 0;
8061           if (register_number (i.op[0].regs)
8062               != register_number (i.index_reg)
8063               && register_number (i.op[2].regs)
8064                  != register_number (i.index_reg)
8065               && register_number (i.op[0].regs)
8066                  != register_number (i.op[2].regs))
8067             return 0;
8068           if (operand_check == check_error)
8069             {
8070               i.error = invalid_vector_register_set;
8071               return 1;
8072             }
8073           as_warn (_("mask, index, and destination registers should be distinct"));
8074         }
8075       else if (i.reg_operands == 1 && i.mask.reg)
8076         {
8077           if (i.types[1].bitfield.class == RegSIMD
8078               && (i.types[1].bitfield.xmmword
8079                   || i.types[1].bitfield.ymmword
8080                   || i.types[1].bitfield.zmmword)
8081               && (register_number (i.op[1].regs)
8082                   == register_number (i.index_reg)))
8083             {
8084               if (operand_check == check_error)
8085                 {
8086                   i.error = invalid_vector_register_set;
8087                   return 1;
8088                 }
8089               if (operand_check != check_none)
8090                 as_warn (_("index and destination registers should be distinct"));
8091             }
8092         }
8093     }
8094
8095   /* For AMX instructions with 3 TMM register operands, all operands
8096       must be distinct.  */
8097   if (i.reg_operands == 3
8098       && t->operand_types[0].bitfield.tmmword
8099       && (i.op[0].regs == i.op[1].regs
8100           || i.op[0].regs == i.op[2].regs
8101           || i.op[1].regs == i.op[2].regs))
8102     {
8103       i.error = invalid_tmm_register_set;
8104       return 1;
8105     }
8106
8107   /* For some special instructions require that destination must be distinct
8108      from source registers.  */
8109   if (t->opcode_modifier.operandconstraint == DISTINCT_DEST)
8110     {
8111       unsigned int dest_reg = i.operands - 1;
8112
8113       know (i.operands >= 3);
8114
8115       /* #UD if dest_reg == src1_reg or dest_reg == src2_reg.  */
8116       if (i.op[dest_reg - 1].regs == i.op[dest_reg].regs
8117           || (i.reg_operands > 2
8118               && i.op[dest_reg - 2].regs == i.op[dest_reg].regs))
8119         {
8120           i.error = invalid_dest_and_src_register_set;
8121           return 1;
8122         }
8123     }
8124
8125   /* Check if broadcast is supported by the instruction and is applied
8126      to the memory operand.  */
8127   if (i.broadcast.type || i.broadcast.bytes)
8128     {
8129       i386_operand_type type, overlap;
8130
8131       /* Check if specified broadcast is supported in this instruction,
8132          and its broadcast bytes match the memory operand.  */
8133       op = i.broadcast.operand;
8134       if (!t->opcode_modifier.broadcast
8135           || !(i.flags[op] & Operand_Mem)
8136           || (!i.types[op].bitfield.unspecified
8137               && !match_broadcast_size (t, op)))
8138         {
8139         bad_broadcast:
8140           i.error = unsupported_broadcast;
8141           return 1;
8142         }
8143
8144       operand_type_set (&type, 0);
8145       switch (get_broadcast_bytes (t, false))
8146         {
8147         case 2:
8148           type.bitfield.word = 1;
8149           break;
8150         case 4:
8151           type.bitfield.dword = 1;
8152           break;
8153         case 8:
8154           type.bitfield.qword = 1;
8155           break;
8156         case 16:
8157           type.bitfield.xmmword = 1;
8158           break;
8159         case 32:
8160           if (vector_size < VSZ256)
8161             goto bad_broadcast;
8162           type.bitfield.ymmword = 1;
8163           break;
8164         case 64:
8165           if (vector_size < VSZ512)
8166             goto bad_broadcast;
8167           type.bitfield.zmmword = 1;
8168           break;
8169         default:
8170           goto bad_broadcast;
8171         }
8172
8173       overlap = operand_type_and (type, t->operand_types[op]);
8174       if (t->operand_types[op].bitfield.class == RegSIMD
8175           && t->operand_types[op].bitfield.byte
8176              + t->operand_types[op].bitfield.word
8177              + t->operand_types[op].bitfield.dword
8178              + t->operand_types[op].bitfield.qword > 1)
8179         {
8180           overlap.bitfield.xmmword = 0;
8181           overlap.bitfield.ymmword = 0;
8182           overlap.bitfield.zmmword = 0;
8183         }
8184       if (operand_type_all_zero (&overlap))
8185           goto bad_broadcast;
8186
8187       if (t->opcode_modifier.checkoperandsize)
8188         {
8189           unsigned int j;
8190
8191           type.bitfield.baseindex = 1;
8192           for (j = 0; j < i.operands; ++j)
8193             {
8194               if (j != op
8195                   && !operand_type_register_match(i.types[j],
8196                                                   t->operand_types[j],
8197                                                   type,
8198                                                   t->operand_types[op]))
8199                 goto bad_broadcast;
8200             }
8201         }
8202     }
8203   /* If broadcast is supported in this instruction, we need to check if
8204      operand of one-element size isn't specified without broadcast.  */
8205   else if (t->opcode_modifier.broadcast && i.mem_operands)
8206     {
8207       /* Find memory operand.  */
8208       for (op = 0; op < i.operands; op++)
8209         if (i.flags[op] & Operand_Mem)
8210           break;
8211       gas_assert (op < i.operands);
8212       /* Check size of the memory operand.  */
8213       if (match_broadcast_size (t, op))
8214         {
8215           i.error = broadcast_needed;
8216           return 1;
8217         }
8218     }
8219   else
8220     op = MAX_OPERANDS - 1; /* Avoid uninitialized variable warning.  */
8221
8222   /* Check if requested masking is supported.  */
8223   if (i.mask.reg)
8224     {
8225       if (!t->opcode_modifier.masking)
8226         {
8227           i.error = unsupported_masking;
8228           return 1;
8229         }
8230
8231       /* Common rules for masking:
8232          - mask register destinations permit only zeroing-masking, without
8233            that actually being expressed by a {z} operand suffix or EVEX.z,
8234          - memory destinations allow only merging-masking,
8235          - scatter/gather insns (i.e. ones using vSIB) only allow merging-
8236            masking.  */
8237       if (i.mask.zeroing
8238           && (t->operand_types[t->operands - 1].bitfield.class == RegMask
8239               || (i.flags[t->operands - 1] & Operand_Mem)
8240               || t->opcode_modifier.sib))
8241         {
8242           i.error = unsupported_masking;
8243           return 1;
8244         }
8245     }
8246
8247   /* Check if masking is applied to dest operand.  */
8248   if (i.mask.reg && (i.mask.operand != i.operands - 1))
8249     {
8250       i.error = mask_not_on_destination;
8251       return 1;
8252     }
8253
8254   /* Check RC/SAE.  */
8255   if (i.rounding.type != rc_none)
8256     {
8257       if (!t->opcode_modifier.sae
8258           || ((i.rounding.type != saeonly) != t->opcode_modifier.staticrounding)
8259           || i.mem_operands)
8260         {
8261           i.error = unsupported_rc_sae;
8262           return 1;
8263         }
8264
8265       /* Non-EVEX.LIG forms need to have a ZMM register as at least one
8266          operand.  */
8267       if (t->opcode_modifier.evex != EVEXLIG)
8268         {
8269           for (op = 0; op < t->operands; ++op)
8270             if (i.types[op].bitfield.zmmword)
8271               break;
8272           if (op >= t->operands)
8273             {
8274               i.error = operand_size_mismatch;
8275               return 1;
8276             }
8277         }
8278     }
8279
8280   /* Check the special Imm4 cases; must be the first operand.  */
8281   if ((is_cpu (t, CpuXOP) && t->operands == 5)
8282       || (t->opcode_space == SPACE_0F3A
8283           && (t->base_opcode | 3) == 0x0b
8284           && (is_cpu (t, CpuAPX_F)
8285            || (t->opcode_modifier.sse2avx && t->opcode_modifier.evex
8286                && (!t->opcode_modifier.vex
8287                    || (i.encoding != encoding_default
8288                        && i.encoding != encoding_vex
8289                        && i.encoding != encoding_vex3))))))
8290     {
8291       if (i.op[0].imms->X_op != O_constant
8292           || !fits_in_imm4 (i.op[0].imms->X_add_number))
8293         {
8294           i.error = bad_imm4;
8295           return 1;
8296         }
8297
8298       /* Turn off Imm<N> so that update_imm won't complain.  */
8299       if (t->operands == 5)
8300         operand_type_set (&i.types[0], 0);
8301     }
8302
8303   /* Check vector Disp8 operand.  */
8304   if (t->opcode_modifier.disp8memshift
8305       && (!t->opcode_modifier.vex
8306           || need_evex_encoding (t))
8307       && i.disp_encoding <= disp_encoding_8bit)
8308     {
8309       if (i.broadcast.type || i.broadcast.bytes)
8310         i.memshift = t->opcode_modifier.broadcast - 1;
8311       else if (t->opcode_modifier.disp8memshift != DISP8_SHIFT_VL)
8312         i.memshift = t->opcode_modifier.disp8memshift;
8313       else
8314         {
8315           const i386_operand_type *type = NULL, *fallback = NULL;
8316
8317           i.memshift = 0;
8318           for (op = 0; op < i.operands; op++)
8319             if (i.flags[op] & Operand_Mem)
8320               {
8321                 if (t->opcode_modifier.evex == EVEXLIG)
8322                   i.memshift = 2 + (i.suffix == QWORD_MNEM_SUFFIX);
8323                 else if (t->operand_types[op].bitfield.xmmword
8324                          + t->operand_types[op].bitfield.ymmword
8325                          + t->operand_types[op].bitfield.zmmword <= 1)
8326                   type = &t->operand_types[op];
8327                 else if (!i.types[op].bitfield.unspecified)
8328                   type = &i.types[op];
8329                 else /* Ambiguities get resolved elsewhere.  */
8330                   fallback = &t->operand_types[op];
8331               }
8332             else if (i.types[op].bitfield.class == RegSIMD
8333                      && t->opcode_modifier.evex != EVEXLIG)
8334               {
8335                 if (i.types[op].bitfield.zmmword)
8336                   i.memshift = 6;
8337                 else if (i.types[op].bitfield.ymmword && i.memshift < 5)
8338                   i.memshift = 5;
8339                 else if (i.types[op].bitfield.xmmword && i.memshift < 4)
8340                   i.memshift = 4;
8341               }
8342
8343           if (!type && !i.memshift)
8344             type = fallback;
8345           if (type)
8346             {
8347               if (type->bitfield.zmmword)
8348                 i.memshift = 6;
8349               else if (type->bitfield.ymmword)
8350                 i.memshift = 5;
8351               else if (type->bitfield.xmmword)
8352                 i.memshift = 4;
8353             }
8354
8355           /* For the check in fits_in_disp8().  */
8356           if (i.memshift == 0)
8357             i.memshift = -1;
8358         }
8359
8360       for (op = 0; op < i.operands; op++)
8361         if (operand_type_check (i.types[op], disp)
8362             && i.op[op].disps->X_op == O_constant)
8363           {
8364             if (fits_in_disp8 (i.op[op].disps->X_add_number))
8365               {
8366                 i.types[op].bitfield.disp8 = 1;
8367                 return 0;
8368               }
8369             i.types[op].bitfield.disp8 = 0;
8370           }
8371     }
8372
8373   i.memshift = 0;
8374
8375   return 0;
8376 }
8377
8378 /* Check if encoding requirements are met by the instruction.  */
8379
8380 static int
8381 VEX_check_encoding (const insn_template *t)
8382 {
8383   if (i.encoding == encoding_error)
8384     {
8385       i.error = unsupported;
8386       return 1;
8387     }
8388
8389   /* Vector size restrictions.  */
8390   if ((vector_size < VSZ512
8391        && t->opcode_modifier.evex == EVEX512)
8392       || (vector_size < VSZ256
8393           && (t->opcode_modifier.evex == EVEX256
8394               || t->opcode_modifier.vex == VEX256)))
8395     {
8396       i.error = unsupported_vector_size;
8397       return 1;
8398     }
8399
8400   switch (i.encoding)
8401     {
8402     case encoding_default:
8403       break;
8404
8405     case encoding_vex:
8406     case encoding_vex3:
8407       /* This instruction must be encoded with VEX prefix.  */
8408       if (!t->opcode_modifier.vex)
8409         {
8410           i.error = no_vex_encoding;
8411           return 1;
8412         }
8413       break;
8414
8415     case encoding_evex:
8416     case encoding_evex512:
8417       /* This instruction must be encoded with EVEX prefix.  */
8418       if (!t->opcode_modifier.evex)
8419         {
8420           i.error = no_evex_encoding;
8421           return 1;
8422         }
8423       break;
8424
8425     case encoding_egpr:
8426       /* This instruction must be encoded with REX2 or EVEX prefix.  */
8427       if (t->opcode_modifier.vex && !t->opcode_modifier.evex)
8428         {
8429           i.error = no_evex_encoding;
8430           return 1;
8431         }
8432       break;
8433
8434     default:
8435       abort ();
8436     }
8437
8438   return 0;
8439 }
8440
8441 /* Check if Egprs operands are valid for the instruction.  */
8442
8443 static bool
8444 check_EgprOperands (const insn_template *t)
8445 {
8446   if (!t->opcode_modifier.noegpr)
8447     return false;
8448
8449   for (unsigned int op = 0; op < i.operands; op++)
8450     {
8451       if (i.types[op].bitfield.class != Reg)
8452         continue;
8453
8454       if (i.op[op].regs->reg_flags & RegRex2)
8455         {
8456           i.error = register_type_mismatch;
8457           return true;
8458         }
8459     }
8460
8461   if ((i.index_reg && (i.index_reg->reg_flags & RegRex2))
8462       || (i.base_reg && (i.base_reg->reg_flags & RegRex2)))
8463     {
8464       i.error = unsupported_EGPR_for_addressing;
8465       return true;
8466     }
8467
8468   /* Check if pseudo prefix {rex2} is valid.  */
8469   if (i.rex2_encoding && !t->opcode_modifier.sse2avx)
8470     {
8471       i.error = invalid_pseudo_prefix;
8472       return true;
8473     }
8474
8475   return false;
8476 }
8477
8478 /* Check if APX operands are valid for the instruction.  */
8479 static bool
8480 check_APX_operands (const insn_template *t)
8481 {
8482   /* Push2* and Pop2* cannot use RSP and Pop2* cannot pop two same registers.
8483    */
8484   switch (t->mnem_off)
8485     {
8486     case MN_pop2:
8487     case MN_pop2p:
8488       if (register_number (i.op[0].regs) == register_number (i.op[1].regs))
8489         {
8490           i.error = invalid_dest_register_set;
8491           return 1;
8492         }
8493     /* fall through */
8494     case MN_push2:
8495     case MN_push2p:
8496       if (register_number (i.op[0].regs) == 4
8497           || register_number (i.op[1].regs) == 4)
8498         {
8499           i.error = unsupported_rsp_register;
8500           return 1;
8501         }
8502       break;
8503     }
8504   return 0;
8505 }
8506
8507 /* Check if the instruction use the REX registers or REX prefix.  */
8508 static bool
8509 check_Rex_required (void)
8510 {
8511   for (unsigned int op = 0; op < i.operands; op++)
8512     {
8513       if (i.types[op].bitfield.class != Reg)
8514         continue;
8515
8516       if (i.op[op].regs->reg_flags & (RegRex | RegRex64))
8517         return true;
8518     }
8519
8520   if ((i.index_reg && (i.index_reg->reg_flags & (RegRex | RegRex64)))
8521       || (i.base_reg && (i.base_reg->reg_flags & (RegRex | RegRex64))))
8522     return true;
8523
8524   /* Check pseudo prefix {rex} are valid.  */
8525   return i.rex_encoding;
8526 }
8527
8528 /* Optimize APX NDD insns to legacy insns.  */
8529 static unsigned int
8530 can_convert_NDD_to_legacy (const insn_template *t)
8531 {
8532   unsigned int match_dest_op = ~0;
8533
8534   if (!i.has_nf && i.reg_operands >= 2)
8535     {
8536       unsigned int dest = i.operands - 1;
8537       unsigned int src1 = i.operands - 2;
8538       unsigned int src2 = (i.operands > 3) ? i.operands - 3 : 0;
8539
8540       if (i.types[src1].bitfield.class == Reg
8541           && i.op[src1].regs == i.op[dest].regs)
8542         match_dest_op = src1;
8543       /* If the first operand is the same as the third operand,
8544          these instructions need to support the ability to commutative
8545          the first two operands and still not change the semantics in order
8546          to be optimized.  */
8547       else if (optimize > 1
8548                && t->opcode_modifier.commutative
8549                && i.types[src2].bitfield.class == Reg
8550                && i.op[src2].regs == i.op[dest].regs)
8551         match_dest_op = src2;
8552     }
8553   return match_dest_op;
8554 }
8555
8556 /* Helper function for the progress() macro in match_template().  */
8557 static INLINE enum i386_error progress (enum i386_error new,
8558                                         enum i386_error last,
8559                                         unsigned int line, unsigned int *line_p)
8560 {
8561   if (line <= *line_p)
8562     return last;
8563   *line_p = line;
8564   return new;
8565 }
8566
8567 static const insn_template *
8568 match_template (char mnem_suffix)
8569 {
8570   /* Points to template once we've found it.  */
8571   const insn_template *t;
8572   i386_operand_type overlap0, overlap1, overlap2, overlap3;
8573   i386_operand_type overlap4;
8574   unsigned int found_reverse_match;
8575   i386_operand_type operand_types [MAX_OPERANDS];
8576   int addr_prefix_disp;
8577   unsigned int j, size_match, check_register, errline = __LINE__;
8578   enum i386_error specific_error = number_of_operands_mismatch;
8579 #define progress(err) progress (err, specific_error, __LINE__, &errline)
8580
8581 #if MAX_OPERANDS != 5
8582 # error "MAX_OPERANDS must be 5."
8583 #endif
8584
8585   found_reverse_match = 0;
8586   addr_prefix_disp = -1;
8587
8588   for (t = current_templates.start; t < current_templates.end; t++)
8589     {
8590       addr_prefix_disp = -1;
8591       found_reverse_match = 0;
8592
8593       /* Must have right number of operands.  */
8594       if (i.operands != t->operands)
8595         continue;
8596
8597       /* Skip SSE2AVX templates when inapplicable.  */
8598       if (t->opcode_modifier.sse2avx
8599           && (!sse2avx || i.prefix[DATA_PREFIX]))
8600         {
8601           /* Another non-SSE2AVX template has to follow.  */
8602           gas_assert (t + 1 < current_templates.end);
8603           continue;
8604         }
8605
8606       /* Check processor support.  */
8607       specific_error = progress (unsupported);
8608       if (cpu_flags_match (t) != CPU_FLAGS_PERFECT_MATCH)
8609         continue;
8610
8611       /* Check AT&T mnemonic.   */
8612       specific_error = progress (unsupported_with_intel_mnemonic);
8613       if (!intel_syntax && intel_mnemonic
8614           && t->opcode_modifier.dialect == ATT_MNEMONIC)
8615         continue;
8616
8617       /* Check AT&T/Intel syntax.  */
8618       specific_error = progress (unsupported_syntax);
8619       if (intel_syntax
8620            ? t->opcode_modifier.dialect >= ATT_SYNTAX
8621            : t->opcode_modifier.dialect == INTEL_SYNTAX)
8622         continue;
8623
8624       /* Check NF support.  */
8625       specific_error = progress (unsupported_nf);
8626       if (i.has_nf && !t->opcode_modifier.nf)
8627         continue;
8628
8629       /* Check Intel64/AMD64 ISA.   */
8630       switch (isa64)
8631         {
8632         default:
8633           /* Default: Don't accept Intel64.  */
8634           if (t->opcode_modifier.isa64 == INTEL64)
8635             continue;
8636           break;
8637         case amd64:
8638           /* -mamd64: Don't accept Intel64 and Intel64 only.  */
8639           if (t->opcode_modifier.isa64 >= INTEL64)
8640             continue;
8641           break;
8642         case intel64:
8643           /* -mintel64: Don't accept AMD64.  */
8644           if (t->opcode_modifier.isa64 == AMD64 && flag_code == CODE_64BIT)
8645             continue;
8646           break;
8647         }
8648
8649       /* Check the suffix.  */
8650       specific_error = progress (invalid_instruction_suffix);
8651       if ((t->opcode_modifier.no_bsuf && mnem_suffix == BYTE_MNEM_SUFFIX)
8652           || (t->opcode_modifier.no_wsuf && mnem_suffix == WORD_MNEM_SUFFIX)
8653           || (t->opcode_modifier.no_lsuf && mnem_suffix == LONG_MNEM_SUFFIX)
8654           || (t->opcode_modifier.no_ssuf && mnem_suffix == SHORT_MNEM_SUFFIX)
8655           || (t->opcode_modifier.no_qsuf && mnem_suffix == QWORD_MNEM_SUFFIX))
8656         continue;
8657
8658       specific_error = progress (operand_size_mismatch);
8659       size_match = operand_size_match (t);
8660       if (!size_match)
8661         continue;
8662
8663       /* This is intentionally not
8664
8665          if (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
8666
8667          as the case of a missing * on the operand is accepted (perhaps with
8668          a warning, issued further down).  */
8669       specific_error = progress (operand_type_mismatch);
8670       if (i.jumpabsolute && t->opcode_modifier.jump != JUMP_ABSOLUTE)
8671         continue;
8672
8673       /* In Intel syntax, normally we can check for memory operand size when
8674          there is no mnemonic suffix.  But jmp and call have 2 different
8675          encodings with Dword memory operand size.  Skip the "near" one
8676          (permitting a register operand) when "far" was requested.  */
8677       if (i.far_branch
8678           && t->opcode_modifier.jump == JUMP_ABSOLUTE
8679           && t->operand_types[0].bitfield.class == Reg)
8680         continue;
8681
8682       for (j = 0; j < MAX_OPERANDS; j++)
8683         operand_types[j] = t->operand_types[j];
8684
8685       /* In general, don't allow 32-bit operands on pre-386.  */
8686       specific_error = progress (mnem_suffix ? invalid_instruction_suffix
8687                                              : operand_size_mismatch);
8688       j = i.imm_operands + (t->operands > i.imm_operands + 1);
8689       if (i.suffix == LONG_MNEM_SUFFIX
8690           && !cpu_arch_flags.bitfield.cpui386
8691           && (intel_syntax
8692               ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
8693                  && !intel_float_operand (insn_name (t)))
8694               : intel_float_operand (insn_name (t)) != 2)
8695           && (t->operands == i.imm_operands
8696               || (operand_types[i.imm_operands].bitfield.class != RegMMX
8697                && operand_types[i.imm_operands].bitfield.class != RegSIMD
8698                && operand_types[i.imm_operands].bitfield.class != RegMask)
8699               || (operand_types[j].bitfield.class != RegMMX
8700                   && operand_types[j].bitfield.class != RegSIMD
8701                   && operand_types[j].bitfield.class != RegMask))
8702           && !t->opcode_modifier.sib)
8703         continue;
8704
8705       /* Do not verify operands when there are none.  */
8706       if (!t->operands)
8707         {
8708           if (VEX_check_encoding (t))
8709             {
8710               specific_error = progress (i.error);
8711               continue;
8712             }
8713
8714           /* Check if pseudo prefix {rex2} is valid.  */
8715           if (t->opcode_modifier.noegpr && i.rex2_encoding)
8716             {
8717               specific_error = progress (invalid_pseudo_prefix);
8718               continue;
8719             }
8720
8721           /* We've found a match; break out of loop.  */
8722           break;
8723         }
8724
8725       if (!t->opcode_modifier.jump
8726           || t->opcode_modifier.jump == JUMP_ABSOLUTE)
8727         {
8728           /* There should be only one Disp operand.  */
8729           for (j = 0; j < MAX_OPERANDS; j++)
8730             if (operand_type_check (operand_types[j], disp))
8731               break;
8732           if (j < MAX_OPERANDS)
8733             {
8734               bool override = (i.prefix[ADDR_PREFIX] != 0);
8735
8736               addr_prefix_disp = j;
8737
8738               /* Address size prefix will turn Disp64 operand into Disp32 and
8739                  Disp32/Disp16 one into Disp16/Disp32 respectively.  */
8740               switch (flag_code)
8741                 {
8742                 case CODE_16BIT:
8743                   override = !override;
8744                   /* Fall through.  */
8745                 case CODE_32BIT:
8746                   if (operand_types[j].bitfield.disp32
8747                       && operand_types[j].bitfield.disp16)
8748                     {
8749                       operand_types[j].bitfield.disp16 = override;
8750                       operand_types[j].bitfield.disp32 = !override;
8751                     }
8752                   gas_assert (!operand_types[j].bitfield.disp64);
8753                   break;
8754
8755                 case CODE_64BIT:
8756                   if (operand_types[j].bitfield.disp64)
8757                     {
8758                       gas_assert (!operand_types[j].bitfield.disp32);
8759                       operand_types[j].bitfield.disp32 = override;
8760                       operand_types[j].bitfield.disp64 = !override;
8761                     }
8762                   operand_types[j].bitfield.disp16 = 0;
8763                   break;
8764                 }
8765             }
8766         }
8767
8768       /* We check register size if needed.  */
8769       if (t->opcode_modifier.checkoperandsize)
8770         {
8771           check_register = (1 << t->operands) - 1;
8772           if (i.broadcast.type || i.broadcast.bytes)
8773             check_register &= ~(1 << i.broadcast.operand);
8774         }
8775       else
8776         check_register = 0;
8777
8778       overlap0 = operand_type_and (i.types[0], operand_types[0]);
8779       switch (t->operands)
8780         {
8781         case 1:
8782           if (!operand_type_match (overlap0, i.types[0]))
8783             continue;
8784
8785           /* Allow the ModR/M encoding to be requested by using the {load} or
8786              {store} pseudo prefix on an applicable insn.  */
8787           if (!t->opcode_modifier.modrm
8788               && i.reg_operands == 1
8789               && ((i.dir_encoding == dir_encoding_load
8790                    && t->mnem_off != MN_pop)
8791                   || (i.dir_encoding == dir_encoding_store
8792                       && t->mnem_off != MN_push))
8793               /* Avoid BSWAP.  */
8794               && t->mnem_off != MN_bswap)
8795             continue;
8796           break;
8797
8798         case 2:
8799           /* xchg %eax, %eax is a special case. It is an alias for nop
8800              only in 32bit mode and we can use opcode 0x90.  In 64bit
8801              mode, we can't use 0x90 for xchg %eax, %eax since it should
8802              zero-extend %eax to %rax.  */
8803           if (t->base_opcode == 0x90
8804               && t->opcode_space == SPACE_BASE)
8805             {
8806               if (flag_code == CODE_64BIT
8807                   && i.types[0].bitfield.instance == Accum
8808                   && i.types[0].bitfield.dword
8809                   && i.types[1].bitfield.instance == Accum)
8810                 continue;
8811
8812               /* Allow the ModR/M encoding to be requested by using the
8813                  {load} or {store} pseudo prefix.  */
8814               if (i.dir_encoding == dir_encoding_load
8815                   || i.dir_encoding == dir_encoding_store)
8816                 continue;
8817             }
8818
8819           if (t->base_opcode == MOV_AX_DISP32
8820               && t->opcode_space == SPACE_BASE
8821               && t->mnem_off != MN_movabs)
8822             {
8823               /* Force 0x8b encoding for "mov foo@GOT, %eax".  */
8824               if (i.reloc[0] == BFD_RELOC_386_GOT32)
8825                 continue;
8826
8827               /* xrelease mov %eax, <disp> is another special case. It must not
8828                  match the accumulator-only encoding of mov.  */
8829               if (i.hle_prefix)
8830                 continue;
8831
8832               /* Allow the ModR/M encoding to be requested by using a suitable
8833                  {load} or {store} pseudo prefix.  */
8834               if (i.dir_encoding == (i.types[0].bitfield.instance == Accum
8835                                      ? dir_encoding_store
8836                                      : dir_encoding_load)
8837                   && !i.types[0].bitfield.disp64
8838                   && !i.types[1].bitfield.disp64)
8839                 continue;
8840             }
8841
8842           /* Allow the ModR/M encoding to be requested by using the {load} or
8843              {store} pseudo prefix on an applicable insn.  */
8844           if (!t->opcode_modifier.modrm
8845               && i.reg_operands == 1
8846               && i.imm_operands == 1
8847               && (i.dir_encoding == dir_encoding_load
8848                   || i.dir_encoding == dir_encoding_store)
8849               && t->opcode_space == SPACE_BASE)
8850             {
8851               if (t->base_opcode == 0xb0 /* mov $imm, %reg */
8852                   && i.dir_encoding == dir_encoding_store)
8853                 continue;
8854
8855               if ((t->base_opcode | 0x38) == 0x3c /* <alu> $imm, %acc */
8856                   && (t->base_opcode != 0x3c /* cmp $imm, %acc */
8857                       || i.dir_encoding == dir_encoding_load))
8858                 continue;
8859
8860               if (t->base_opcode == 0xa8 /* test $imm, %acc */
8861                   && i.dir_encoding == dir_encoding_load)
8862                 continue;
8863             }
8864           /* Fall through.  */
8865
8866         case 3:
8867           if (!(size_match & MATCH_STRAIGHT))
8868             goto check_reverse;
8869           /* Reverse direction of operands if swapping is possible in the first
8870              place (operands need to be symmetric) and
8871              - the load form is requested, and the template is a store form,
8872              - the store form is requested, and the template is a load form,
8873              - the non-default (swapped) form is requested.  */
8874           overlap1 = operand_type_and (operand_types[0], operand_types[1]);
8875
8876           j = i.operands - 1 - (t->opcode_space == SPACE_EVEXMAP4
8877                                 && t->opcode_modifier.vexvvvv);
8878
8879           if (t->opcode_modifier.d && i.reg_operands == i.operands
8880               && !operand_type_all_zero (&overlap1))
8881             switch (i.dir_encoding)
8882               {
8883               case dir_encoding_load:
8884                 if (operand_type_check (operand_types[j], anymem)
8885                     || t->opcode_modifier.regmem)
8886                   goto check_reverse;
8887                 break;
8888
8889               case dir_encoding_store:
8890                 if (!operand_type_check (operand_types[j], anymem)
8891                     && !t->opcode_modifier.regmem)
8892                   goto check_reverse;
8893                 break;
8894
8895               case dir_encoding_swap:
8896                 goto check_reverse;
8897
8898               case dir_encoding_default:
8899                 break;
8900               }
8901
8902           /* If we want store form, we skip the current load.  */
8903           if ((i.dir_encoding == dir_encoding_store
8904                || i.dir_encoding == dir_encoding_swap)
8905               && i.mem_operands == 0
8906               && t->opcode_modifier.load)
8907             continue;
8908           /* Fall through.  */
8909         case 4:
8910         case 5:
8911           overlap1 = operand_type_and (i.types[1], operand_types[1]);
8912           if (!operand_type_match (overlap0, i.types[0])
8913               || !operand_type_match (overlap1, i.types[1])
8914               || ((check_register & 3) == 3
8915                   && !operand_type_register_match (i.types[0],
8916                                                    operand_types[0],
8917                                                    i.types[1],
8918                                                    operand_types[1])))
8919             {
8920               specific_error = progress (i.error);
8921
8922               /* Check if other direction is valid ...  */
8923               if (!t->opcode_modifier.d)
8924                 continue;
8925
8926             check_reverse:
8927               if (!(size_match & MATCH_REVERSE))
8928                 continue;
8929               /* Try reversing direction of operands.  */
8930               j = is_cpu (t, CpuFMA4)
8931                   || is_cpu (t, CpuXOP)
8932                   || is_cpu (t, CpuAPX_F) ? 1 : i.operands - 1;
8933               overlap0 = operand_type_and (i.types[0], operand_types[j]);
8934               overlap1 = operand_type_and (i.types[j], operand_types[0]);
8935               overlap2 = operand_type_and (i.types[1], operand_types[1]);
8936               gas_assert (t->operands != 3 || !check_register
8937                           || is_cpu (t, CpuAPX_F));
8938               if (!operand_type_match (overlap0, i.types[0])
8939                   || !operand_type_match (overlap1, i.types[j])
8940                   || (t->operands == 3
8941                       && !operand_type_match (overlap2, i.types[1]))
8942                   || (check_register
8943                       && !operand_type_register_match (i.types[0],
8944                                                        operand_types[j],
8945                                                        i.types[j],
8946                                                        operand_types[0])))
8947                 {
8948                   /* Does not match either direction.  */
8949                   specific_error = progress (i.error);
8950                   continue;
8951                 }
8952               /* found_reverse_match holds which variant of D
8953                  we've found.  */
8954               if (!t->opcode_modifier.d)
8955                 found_reverse_match = 0;
8956               else if (operand_types[0].bitfield.tbyte)
8957                 {
8958                   if (t->opcode_modifier.operandconstraint != UGH)
8959                     found_reverse_match = Opcode_FloatD;
8960                   else
8961                     found_reverse_match = ~0;
8962                   /* FSUB{,R} and FDIV{,R} may need a 2nd bit flipped.  */
8963                   if ((t->extension_opcode & 4)
8964                       && (intel_syntax || intel_mnemonic))
8965                     found_reverse_match |= Opcode_FloatR;
8966                 }
8967               else if (is_cpu (t, CpuFMA4) || is_cpu (t, CpuXOP))
8968                 {
8969                   found_reverse_match = Opcode_VexW;
8970                   goto check_operands_345;
8971                 }
8972               else if (t->opcode_space == SPACE_EVEXMAP4
8973                        && t->opcode_modifier.w)
8974                 {
8975                   found_reverse_match = Opcode_D;
8976                   goto check_operands_345;
8977                 }
8978               else if (t->opcode_space != SPACE_BASE
8979                        && (t->opcode_space != SPACE_0F
8980                            /* MOV to/from CR/DR/TR, as an exception, follow
8981                               the base opcode space encoding model.  */
8982                            || (t->base_opcode | 7) != 0x27))
8983                 found_reverse_match = (t->base_opcode & 0xee) != 0x6e
8984                                       ? Opcode_ExtD : Opcode_SIMD_IntD;
8985               else if (!t->opcode_modifier.commutative)
8986                 found_reverse_match = Opcode_D;
8987               else
8988                 found_reverse_match = ~0;
8989             }
8990           else
8991             {
8992               /* Found a forward 2 operand match here.  */
8993             check_operands_345:
8994               switch (t->operands)
8995                 {
8996                 case 5:
8997                   overlap4 = operand_type_and (i.types[4], operand_types[4]);
8998                   if (!operand_type_match (overlap4, i.types[4])
8999                       || !operand_type_register_match (i.types[3],
9000                                                        operand_types[3],
9001                                                        i.types[4],
9002                                                        operand_types[4]))
9003                     {
9004                       specific_error = progress (i.error);
9005                       continue;
9006                     }
9007                   /* Fall through.  */
9008                 case 4:
9009                   overlap3 = operand_type_and (i.types[3], operand_types[3]);
9010                   if (!operand_type_match (overlap3, i.types[3])
9011                       || ((check_register & 0xa) == 0xa
9012                           && !operand_type_register_match (i.types[1],
9013                                                             operand_types[1],
9014                                                             i.types[3],
9015                                                             operand_types[3]))
9016                       || ((check_register & 0xc) == 0xc
9017                           && !operand_type_register_match (i.types[2],
9018                                                             operand_types[2],
9019                                                             i.types[3],
9020                                                             operand_types[3])))
9021                     {
9022                       specific_error = progress (i.error);
9023                       continue;
9024                     }
9025                   /* Fall through.  */
9026                 case 3:
9027                   overlap2 = operand_type_and (i.types[2], operand_types[2]);
9028                   if (!operand_type_match (overlap2, i.types[2])
9029                       || ((check_register & 5) == 5
9030                           && !operand_type_register_match (i.types[0],
9031                                                             operand_types[0],
9032                                                             i.types[2],
9033                                                             operand_types[2]))
9034                       || ((check_register & 6) == 6
9035                           && !operand_type_register_match (i.types[1],
9036                                                             operand_types[1],
9037                                                             i.types[2],
9038                                                             operand_types[2])))
9039                     {
9040                       specific_error = progress (i.error);
9041                       continue;
9042                     }
9043                   break;
9044                 }
9045             }
9046           /* Found either forward/reverse 2, 3 or 4 operand match here:
9047              slip through to break.  */
9048         }
9049
9050       /* Check if VEX/EVEX encoding requirements can be satisfied.  */
9051       if (VEX_check_encoding (t))
9052         {
9053           specific_error = progress (i.error);
9054           continue;
9055         }
9056
9057       /* Check if EGPR operands(r16-r31) are valid.  */
9058       if (check_EgprOperands (t))
9059         {
9060           specific_error = progress (i.error);
9061           continue;
9062         }
9063
9064       /* Check if vector operands are valid.  */
9065       if (check_VecOperands (t))
9066         {
9067           specific_error = progress (i.error);
9068           continue;
9069         }
9070
9071       /* Check if APX operands are valid.  */
9072       if (check_APX_operands (t))
9073         {
9074           specific_error = progress (i.error);
9075           continue;
9076         }
9077
9078       /* Check whether to use the shorter VEX encoding for certain insns where
9079          the EVEX encoding comes first in the table.  This requires the respective
9080          AVX-* feature to be explicitly enabled.
9081
9082          Most of the respective insns have just a single EVEX and a single VEX
9083          template.  The one that's presently different is generated using the
9084          Vxy / Exy constructs: There are 3 suffix-less EVEX forms, the latter
9085          two of which may fall back to their two corresponding VEX forms.  */
9086       j = t->mnem_off != MN_vcvtneps2bf16 ? 1 : 2;
9087       if ((t == current_templates.start || j > 1)
9088           && t->opcode_modifier.disp8memshift
9089           && !t->opcode_modifier.vex
9090           && !need_evex_encoding (t)
9091           && t + j < current_templates.end
9092           && t[j].opcode_modifier.vex)
9093         {
9094           i386_cpu_flags cpu;
9095           unsigned int memshift = i.memshift;
9096
9097           i.memshift = 0;
9098           cpu = cpu_flags_and (cpu_flags_from_attr (t[j].cpu),
9099                                cpu_arch_isa_flags);
9100           if (!cpu_flags_all_zero (&cpu)
9101               && (!i.types[0].bitfield.disp8
9102                   || !operand_type_check (i.types[0], disp)
9103                   || i.op[0].disps->X_op != O_constant
9104                   || fits_in_disp8 (i.op[0].disps->X_add_number)))
9105             {
9106               specific_error = progress (internal_error);
9107               t += j - 1;
9108               continue;
9109             }
9110           i.memshift = memshift;
9111         }
9112
9113       /* If we can optimize a NDD insn to legacy insn, like
9114          add %r16, %r8, %r8 -> add %r16, %r8,
9115          add  %r8, %r16, %r8 -> add %r16, %r8, then rematch template.
9116          Note that the semantics have not been changed.  */
9117       if (optimize
9118           && !i.no_optimize
9119           && i.encoding != encoding_evex
9120           && ((t + 1 < current_templates.end
9121                && !t[1].opcode_modifier.evex
9122                && t[1].opcode_space <= SPACE_0F38
9123                && t->opcode_modifier.vexvvvv == VexVVVV_DST)
9124               || t->mnem_off == MN_movbe)
9125           && (i.types[i.operands - 1].bitfield.dword
9126               || i.types[i.operands - 1].bitfield.qword))
9127         {
9128           unsigned int match_dest_op = can_convert_NDD_to_legacy (t);
9129
9130           if (match_dest_op != (unsigned int) ~0)
9131             {
9132               size_match = true;
9133               /* We ensure that the next template has the same input
9134                  operands as the original matching template by the first
9135                  opernd (ATT). To avoid someone support new NDD insns and
9136                  put it in the wrong position.  */
9137               overlap0 = operand_type_and (i.types[0],
9138                                            t[1].operand_types[0]);
9139               if (t->opcode_modifier.d)
9140                 overlap1 = operand_type_and (i.types[0],
9141                                              t[1].operand_types[1]);
9142               if (!operand_type_match (overlap0, i.types[0])
9143                   && (!t->opcode_modifier.d
9144                       || !operand_type_match (overlap1, i.types[0])))
9145                 size_match = false;
9146
9147               if (size_match
9148                   && (t[1].opcode_space <= SPACE_0F
9149                       /* Some non-legacy-map0/1 insns can be shorter when
9150                          legacy-encoded and when no REX prefix is required.  */
9151                       || (!check_EgprOperands (t + 1)
9152                           && !check_Rex_required ()
9153                           && !i.op[i.operands - 1].regs->reg_type.bitfield.qword)))
9154                 {
9155                   if (i.operands > 2 && match_dest_op == i.operands - 3)
9156                     swap_2_operands (match_dest_op, i.operands - 2);
9157
9158                   --i.operands;
9159                   --i.reg_operands;
9160
9161                   if (t->mnem_off == MN_movbe)
9162                     {
9163                       gas_assert (t[1].mnem_off == MN_bswap);
9164                       ++current_templates.end;
9165                     }
9166
9167                   specific_error = progress (internal_error);
9168                   continue;
9169                 }
9170
9171             }
9172         }
9173
9174       /* We've found a match; break out of loop.  */
9175       break;
9176     }
9177
9178 #undef progress
9179
9180   if (t == current_templates.end)
9181     {
9182       /* We found no match.  */
9183       i.error = specific_error;
9184       return NULL;
9185     }
9186
9187   if (!quiet_warnings)
9188     {
9189       if (!intel_syntax
9190           && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
9191         as_warn (_("indirect %s without `*'"), insn_name (t));
9192
9193       if (t->opcode_modifier.isprefix
9194           && t->opcode_modifier.mnemonicsize == IGNORESIZE)
9195         {
9196           /* Warn them that a data or address size prefix doesn't
9197              affect assembly of the next line of code.  */
9198           as_warn (_("stand-alone `%s' prefix"), insn_name (t));
9199         }
9200     }
9201
9202   /* Copy the template we found.  */
9203   install_template (t);
9204
9205   if (addr_prefix_disp != -1)
9206     i.tm.operand_types[addr_prefix_disp]
9207       = operand_types[addr_prefix_disp];
9208
9209   /* APX insns acting on byte operands are WIG, yet that can't be expressed
9210      in the templates (they're also covering word/dword/qword operands).  */
9211   if (t->opcode_space == SPACE_EVEXMAP4 && !t->opcode_modifier.vexw &&
9212       i.types[i.operands - 1].bitfield.byte)
9213     {
9214       gas_assert (t->opcode_modifier.w);
9215       i.tm.opcode_modifier.vexw = VEXWIG;
9216     }
9217
9218   switch (found_reverse_match)
9219     {
9220     case 0:
9221       break;
9222
9223     case Opcode_FloatR:
9224     case Opcode_FloatR | Opcode_FloatD:
9225       i.tm.extension_opcode ^= Opcode_FloatR >> 3;
9226       found_reverse_match &= Opcode_FloatD;
9227
9228       /* Fall through.  */
9229     default:
9230       /* If we found a reverse match we must alter the opcode direction
9231          bit and clear/flip the regmem modifier one.  found_reverse_match
9232          holds bits to change (different for int & float insns).  */
9233
9234       i.tm.base_opcode ^= found_reverse_match;
9235
9236       if (i.tm.opcode_space == SPACE_EVEXMAP4)
9237         goto swap_first_2;
9238
9239       /* Certain SIMD insns have their load forms specified in the opcode
9240          table, and hence we need to _set_ RegMem instead of clearing it.
9241          We need to avoid setting the bit though on insns like KMOVW.  */
9242       i.tm.opcode_modifier.regmem
9243         = i.tm.opcode_modifier.modrm && i.tm.opcode_modifier.d
9244           && i.tm.operands > 2U - i.tm.opcode_modifier.sse2avx
9245           && !i.tm.opcode_modifier.regmem;
9246
9247       /* Fall through.  */
9248     case ~0:
9249       i.tm.operand_types[0] = operand_types[i.operands - 1];
9250       i.tm.operand_types[i.operands - 1] = operand_types[0];
9251       break;
9252
9253     case Opcode_VexW:
9254       /* Only the first two register operands need reversing, alongside
9255          flipping VEX.W.  */
9256       i.tm.opcode_modifier.vexw ^= VEXW0 ^ VEXW1;
9257
9258     swap_first_2:
9259       j = i.tm.operand_types[0].bitfield.imm8;
9260       i.tm.operand_types[j] = operand_types[j + 1];
9261       i.tm.operand_types[j + 1] = operand_types[j];
9262       break;
9263     }
9264
9265   return t;
9266 }
9267
9268 static int
9269 check_string (void)
9270 {
9271   unsigned int es_op = i.tm.opcode_modifier.isstring - IS_STRING_ES_OP0;
9272   unsigned int op = i.tm.operand_types[0].bitfield.baseindex ? es_op : 0;
9273
9274   if (i.seg[op] != NULL && i.seg[op] != reg_es)
9275     {
9276       as_bad (_("`%s' operand %u must use `%ses' segment"),
9277               insn_name (&i.tm),
9278               intel_syntax ? i.tm.operands - es_op : es_op + 1,
9279               register_prefix);
9280       return 0;
9281     }
9282
9283   /* There's only ever one segment override allowed per instruction.
9284      This instruction possibly has a legal segment override on the
9285      second operand, so copy the segment to where non-string
9286      instructions store it, allowing common code.  */
9287   i.seg[op] = i.seg[1];
9288
9289   return 1;
9290 }
9291
9292 static int
9293 process_suffix (void)
9294 {
9295   bool is_movx = false;
9296
9297   /* If matched instruction specifies an explicit instruction mnemonic
9298      suffix, use it.  */
9299   if (i.tm.opcode_modifier.size == SIZE16)
9300     i.suffix = WORD_MNEM_SUFFIX;
9301   else if (i.tm.opcode_modifier.size == SIZE32)
9302     i.suffix = LONG_MNEM_SUFFIX;
9303   else if (i.tm.opcode_modifier.size == SIZE64)
9304     i.suffix = QWORD_MNEM_SUFFIX;
9305   else if (i.reg_operands
9306            && (i.operands > 1 || i.types[0].bitfield.class == Reg)
9307            && i.tm.opcode_modifier.operandconstraint != ADDR_PREFIX_OP_REG)
9308     {
9309       unsigned int numop = i.operands;
9310
9311       /* MOVSX/MOVZX */
9312       is_movx = (i.tm.opcode_space == SPACE_0F
9313                  && (i.tm.base_opcode | 8) == 0xbe)
9314                 || (i.tm.opcode_space == SPACE_BASE
9315                     && i.tm.base_opcode == 0x63
9316                     && is_cpu (&i.tm, Cpu64));
9317
9318       /* movsx/movzx want only their source operand considered here, for the
9319          ambiguity checking below.  The suffix will be replaced afterwards
9320          to represent the destination (register).  */
9321       if (is_movx && (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63))
9322         --i.operands;
9323
9324       /* crc32 needs REX.W set regardless of suffix / source operand size.  */
9325       if (i.tm.mnem_off == MN_crc32 && i.tm.operand_types[1].bitfield.qword)
9326         i.rex |= REX_W;
9327
9328       /* If there's no instruction mnemonic suffix we try to invent one
9329          based on GPR operands.  */
9330       if (!i.suffix)
9331         {
9332           /* We take i.suffix from the last register operand specified,
9333              Destination register type is more significant than source
9334              register type.  crc32 in SSE4.2 prefers source register
9335              type. */
9336           unsigned int op = i.tm.mnem_off == MN_crc32 ? 1 : i.operands;
9337
9338           while (op--)
9339             if (i.tm.operand_types[op].bitfield.instance == InstanceNone
9340                 || i.tm.operand_types[op].bitfield.instance == Accum)
9341               {
9342                 if (i.types[op].bitfield.class != Reg)
9343                   continue;
9344                 if (i.types[op].bitfield.byte)
9345                   i.suffix = BYTE_MNEM_SUFFIX;
9346                 else if (i.types[op].bitfield.word)
9347                   i.suffix = WORD_MNEM_SUFFIX;
9348                 else if (i.types[op].bitfield.dword)
9349                   i.suffix = LONG_MNEM_SUFFIX;
9350                 else if (i.types[op].bitfield.qword)
9351                   i.suffix = QWORD_MNEM_SUFFIX;
9352                 else
9353                   continue;
9354                 break;
9355               }
9356
9357           /* As an exception, movsx/movzx silently default to a byte source
9358              in AT&T mode.  */
9359           if (is_movx && i.tm.opcode_modifier.w && !i.suffix && !intel_syntax)
9360             i.suffix = BYTE_MNEM_SUFFIX;
9361         }
9362       else if (i.suffix == BYTE_MNEM_SUFFIX)
9363         {
9364           if (!check_byte_reg ())
9365             return 0;
9366         }
9367       else if (i.suffix == LONG_MNEM_SUFFIX)
9368         {
9369           if (!check_long_reg ())
9370             return 0;
9371         }
9372       else if (i.suffix == QWORD_MNEM_SUFFIX)
9373         {
9374           if (!check_qword_reg ())
9375             return 0;
9376         }
9377       else if (i.suffix == WORD_MNEM_SUFFIX)
9378         {
9379           if (!check_word_reg ())
9380             return 0;
9381         }
9382       else if (intel_syntax
9383                && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
9384         /* Do nothing if the instruction is going to ignore the prefix.  */
9385         ;
9386       else
9387         abort ();
9388
9389       /* Undo the movsx/movzx change done above.  */
9390       i.operands = numop;
9391     }
9392   else if (i.tm.opcode_modifier.mnemonicsize == DEFAULTSIZE
9393            && !i.suffix)
9394     {
9395       i.suffix = stackop_size;
9396       if (stackop_size == LONG_MNEM_SUFFIX)
9397         {
9398           /* stackop_size is set to LONG_MNEM_SUFFIX for the
9399              .code16gcc directive to support 16-bit mode with
9400              32-bit address.  For IRET without a suffix, generate
9401              16-bit IRET (opcode 0xcf) to return from an interrupt
9402              handler.  */
9403           if (i.tm.base_opcode == 0xcf)
9404             {
9405               i.suffix = WORD_MNEM_SUFFIX;
9406               as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
9407             }
9408           /* Warn about changed behavior for segment register push/pop.  */
9409           else if ((i.tm.base_opcode | 1) == 0x07)
9410             as_warn (_("generating 32-bit `%s', unlike earlier gas versions"),
9411                      insn_name (&i.tm));
9412         }
9413     }
9414   else if (!i.suffix
9415            && (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE
9416                || i.tm.opcode_modifier.jump == JUMP_BYTE
9417                || i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT
9418                || (i.tm.opcode_space == SPACE_0F
9419                    && i.tm.base_opcode == 0x01 /* [ls][gi]dt */
9420                    && i.tm.extension_opcode <= 3)))
9421     {
9422       switch (flag_code)
9423         {
9424         case CODE_64BIT:
9425           if (!i.tm.opcode_modifier.no_qsuf)
9426             {
9427               if (i.tm.opcode_modifier.jump == JUMP_BYTE
9428                   || i.tm.opcode_modifier.no_lsuf)
9429                 i.suffix = QWORD_MNEM_SUFFIX;
9430               break;
9431             }
9432           /* Fall through.  */
9433         case CODE_32BIT:
9434           if (!i.tm.opcode_modifier.no_lsuf)
9435             i.suffix = LONG_MNEM_SUFFIX;
9436           break;
9437         case CODE_16BIT:
9438           if (!i.tm.opcode_modifier.no_wsuf)
9439             i.suffix = WORD_MNEM_SUFFIX;
9440           break;
9441         }
9442     }
9443
9444   if (!i.suffix
9445       && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
9446           /* Also cover lret/retf/iret in 64-bit mode.  */
9447           || (flag_code == CODE_64BIT
9448               && !i.tm.opcode_modifier.no_lsuf
9449               && !i.tm.opcode_modifier.no_qsuf))
9450       && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
9451       /* Explicit sizing prefixes are assumed to disambiguate insns.  */
9452       && !i.prefix[DATA_PREFIX] && !(i.prefix[REX_PREFIX] & REX_W)
9453       /* Accept FLDENV et al without suffix.  */
9454       && (i.tm.opcode_modifier.no_ssuf || i.tm.opcode_modifier.floatmf))
9455     {
9456       unsigned int suffixes, evex = 0;
9457
9458       suffixes = !i.tm.opcode_modifier.no_bsuf;
9459       if (!i.tm.opcode_modifier.no_wsuf)
9460         suffixes |= 1 << 1;
9461       if (!i.tm.opcode_modifier.no_lsuf)
9462         suffixes |= 1 << 2;
9463       if (!i.tm.opcode_modifier.no_ssuf)
9464         suffixes |= 1 << 4;
9465       if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
9466         suffixes |= 1 << 5;
9467
9468       /* For [XYZ]MMWORD operands inspect operand sizes.  While generally
9469          also suitable for AT&T syntax mode, it was requested that this be
9470          restricted to just Intel syntax.  */
9471       if (intel_syntax && is_any_vex_encoding (&i.tm)
9472           && !i.broadcast.type && !i.broadcast.bytes)
9473         {
9474           unsigned int op;
9475
9476           for (op = 0; op < i.tm.operands; ++op)
9477             {
9478               if (vector_size < VSZ512)
9479                 {
9480                   i.tm.operand_types[op].bitfield.zmmword = 0;
9481                   if (vector_size < VSZ256)
9482                     {
9483                       i.tm.operand_types[op].bitfield.ymmword = 0;
9484                       if (i.tm.operand_types[op].bitfield.xmmword
9485                           && i.tm.opcode_modifier.evex == EVEXDYN)
9486                         i.tm.opcode_modifier.evex = EVEX128;
9487                     }
9488                   else if (i.tm.operand_types[op].bitfield.ymmword
9489                            && !i.tm.operand_types[op].bitfield.xmmword
9490                            && i.tm.opcode_modifier.evex == EVEXDYN)
9491                     i.tm.opcode_modifier.evex = EVEX256;
9492                 }
9493               else if (i.tm.opcode_modifier.evex
9494                        && !cpu_arch_flags.bitfield.cpuavx512vl)
9495                 {
9496                   if (i.tm.operand_types[op].bitfield.ymmword)
9497                     i.tm.operand_types[op].bitfield.xmmword = 0;
9498                   if (i.tm.operand_types[op].bitfield.zmmword)
9499                     i.tm.operand_types[op].bitfield.ymmword = 0;
9500                   if (i.tm.opcode_modifier.evex == EVEXDYN)
9501                     i.tm.opcode_modifier.evex = EVEX512;
9502                 }
9503
9504               if (i.tm.operand_types[op].bitfield.xmmword
9505                   + i.tm.operand_types[op].bitfield.ymmword
9506                   + i.tm.operand_types[op].bitfield.zmmword < 2)
9507                 continue;
9508
9509               /* Any properly sized operand disambiguates the insn.  */
9510               if (i.types[op].bitfield.xmmword
9511                   || i.types[op].bitfield.ymmword
9512                   || i.types[op].bitfield.zmmword)
9513                 {
9514                   suffixes &= ~(7 << 6);
9515                   evex = 0;
9516                   break;
9517                 }
9518
9519               if ((i.flags[op] & Operand_Mem)
9520                   && i.tm.operand_types[op].bitfield.unspecified)
9521                 {
9522                   if (i.tm.operand_types[op].bitfield.xmmword)
9523                     suffixes |= 1 << 6;
9524                   if (i.tm.operand_types[op].bitfield.ymmword)
9525                     suffixes |= 1 << 7;
9526                   if (i.tm.operand_types[op].bitfield.zmmword)
9527                     suffixes |= 1 << 8;
9528                   if (i.tm.opcode_modifier.evex)
9529                     evex = EVEX512;
9530                 }
9531             }
9532         }
9533
9534       /* Are multiple suffixes / operand sizes allowed?  */
9535       if (suffixes & (suffixes - 1))
9536         {
9537           if (intel_syntax
9538               && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
9539                   || operand_check == check_error))
9540             {
9541               as_bad (_("ambiguous operand size for `%s'"), insn_name (&i.tm));
9542               return 0;
9543             }
9544           if (operand_check == check_error)
9545             {
9546               as_bad (_("no instruction mnemonic suffix given and "
9547                         "no register operands; can't size `%s'"), insn_name (&i.tm));
9548               return 0;
9549             }
9550           if (operand_check == check_warning)
9551             as_warn (_("%s; using default for `%s'"),
9552                        intel_syntax
9553                        ? _("ambiguous operand size")
9554                        : _("no instruction mnemonic suffix given and "
9555                            "no register operands"),
9556                        insn_name (&i.tm));
9557
9558           if (i.tm.opcode_modifier.floatmf)
9559             i.suffix = SHORT_MNEM_SUFFIX;
9560           else if (is_movx)
9561             /* handled below */;
9562           else if (evex)
9563             i.tm.opcode_modifier.evex = evex;
9564           else if (flag_code == CODE_16BIT)
9565             i.suffix = WORD_MNEM_SUFFIX;
9566           else if (!i.tm.opcode_modifier.no_lsuf)
9567             i.suffix = LONG_MNEM_SUFFIX;
9568           else
9569             i.suffix = QWORD_MNEM_SUFFIX;
9570         }
9571     }
9572
9573   if (is_movx)
9574     {
9575       /* In Intel syntax, movsx/movzx must have a "suffix" (checked above).
9576          In AT&T syntax, if there is no suffix (warned about above), the default
9577          will be byte extension.  */
9578       if (i.tm.opcode_modifier.w && i.suffix && i.suffix != BYTE_MNEM_SUFFIX)
9579         i.tm.base_opcode |= 1;
9580
9581       /* For further processing, the suffix should represent the destination
9582          (register).  This is already the case when one was used with
9583          mov[sz][bw]*, but we need to replace it for mov[sz]x, or if there was
9584          no suffix to begin with.  */
9585       if (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63 || !i.suffix)
9586         {
9587           if (i.types[1].bitfield.word)
9588             i.suffix = WORD_MNEM_SUFFIX;
9589           else if (i.types[1].bitfield.qword)
9590             i.suffix = QWORD_MNEM_SUFFIX;
9591           else
9592             i.suffix = LONG_MNEM_SUFFIX;
9593
9594           i.tm.opcode_modifier.w = 0;
9595         }
9596     }
9597
9598   if (!i.tm.opcode_modifier.modrm && i.reg_operands && i.tm.operands < 3)
9599     i.short_form = (i.tm.operand_types[0].bitfield.class == Reg)
9600                    != (i.tm.operand_types[1].bitfield.class == Reg);
9601
9602   /* Change the opcode based on the operand size given by i.suffix.  */
9603   switch (i.suffix)
9604     {
9605     /* Size floating point instruction.  */
9606     case LONG_MNEM_SUFFIX:
9607       if (i.tm.opcode_modifier.floatmf)
9608         {
9609           i.tm.base_opcode ^= 4;
9610           break;
9611         }
9612     /* fall through */
9613     case WORD_MNEM_SUFFIX:
9614     case QWORD_MNEM_SUFFIX:
9615       /* It's not a byte, select word/dword operation.  */
9616       if (i.tm.opcode_modifier.w)
9617         {
9618           if (i.short_form)
9619             i.tm.base_opcode |= 8;
9620           else
9621             i.tm.base_opcode |= 1;
9622         }
9623
9624       /* Set mode64 for an operand.  */
9625       if (i.suffix == QWORD_MNEM_SUFFIX)
9626         {
9627           if (flag_code == CODE_64BIT
9628               && !i.tm.opcode_modifier.norex64
9629               && !i.tm.opcode_modifier.vexw
9630               /* Special case for xchg %rax,%rax.  It is NOP and doesn't
9631                  need rex64. */
9632               && ! (i.operands == 2
9633                     && i.tm.base_opcode == 0x90
9634                     && i.tm.opcode_space == SPACE_BASE
9635                     && i.types[0].bitfield.instance == Accum
9636                     && i.types[1].bitfield.instance == Accum))
9637             i.rex |= REX_W;
9638
9639           break;
9640         }
9641
9642     /* fall through */
9643     case SHORT_MNEM_SUFFIX:
9644       /* Now select between word & dword operations via the operand
9645          size prefix, except for instructions that will ignore this
9646          prefix anyway.  */
9647       if (i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
9648           && !i.tm.opcode_modifier.floatmf
9649           && (!is_any_vex_encoding (&i.tm)
9650               || i.tm.opcode_space == SPACE_EVEXMAP4)
9651           && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
9652               || (flag_code == CODE_64BIT
9653                   && i.tm.opcode_modifier.jump == JUMP_BYTE)))
9654         {
9655           unsigned int prefix = DATA_PREFIX_OPCODE;
9656
9657           if (i.tm.opcode_modifier.jump == JUMP_BYTE) /* jcxz, loop */
9658             prefix = ADDR_PREFIX_OPCODE;
9659
9660           /* The DATA PREFIX of EVEX promoted from legacy APX instructions
9661              needs to be adjusted.  */
9662           if (i.tm.opcode_space == SPACE_EVEXMAP4)
9663             {
9664               gas_assert (!i.tm.opcode_modifier.opcodeprefix);
9665               i.tm.opcode_modifier.opcodeprefix = PREFIX_0X66;
9666             }
9667           else if (!add_prefix (prefix))
9668             return 0;
9669         }
9670
9671       break;
9672
9673     case 0:
9674       /* Select word/dword/qword operation with explicit data sizing prefix
9675          when there are no suitable register operands.  */
9676       if (i.tm.opcode_modifier.w
9677           && (i.prefix[DATA_PREFIX] || (i.prefix[REX_PREFIX] & REX_W))
9678           && (!i.reg_operands
9679               || (i.reg_operands == 1
9680                       /* ShiftCount */
9681                   && (i.tm.operand_types[0].bitfield.instance == RegC
9682                       /* InOutPortReg */
9683                       || i.tm.operand_types[0].bitfield.instance == RegD
9684                       || i.tm.operand_types[1].bitfield.instance == RegD
9685                       || i.tm.mnem_off == MN_crc32))))
9686         i.tm.base_opcode |= 1;
9687       break;
9688     }
9689
9690   if (i.tm.opcode_modifier.operandconstraint == ADDR_PREFIX_OP_REG)
9691     {
9692       gas_assert (!i.suffix);
9693       gas_assert (i.reg_operands);
9694
9695       if (i.tm.operand_types[0].bitfield.instance == Accum
9696           || i.operands == 1)
9697         {
9698           /* The address size override prefix changes the size of the
9699              first operand.  */
9700           if (flag_code == CODE_64BIT
9701               && i.op[0].regs->reg_type.bitfield.word)
9702             {
9703               as_bad (_("16-bit addressing unavailable for `%s'"),
9704                       insn_name (&i.tm));
9705               return 0;
9706             }
9707
9708           if ((flag_code == CODE_32BIT
9709                ? i.op[0].regs->reg_type.bitfield.word
9710                : i.op[0].regs->reg_type.bitfield.dword)
9711               && !add_prefix (ADDR_PREFIX_OPCODE))
9712             return 0;
9713         }
9714       else
9715         {
9716           /* Check invalid register operand when the address size override
9717              prefix changes the size of register operands.  */
9718           unsigned int op;
9719           enum { need_word, need_dword, need_qword } need;
9720
9721           /* Check the register operand for the address size prefix if
9722              the memory operand has no real registers, like symbol, DISP
9723              or bogus (x32-only) symbol(%rip) when symbol(%eip) is meant.  */
9724           if (i.mem_operands == 1
9725               && i.reg_operands == 1
9726               && i.operands == 2
9727               && i.types[1].bitfield.class == Reg
9728               && (flag_code == CODE_32BIT
9729                   ? i.op[1].regs->reg_type.bitfield.word
9730                   : i.op[1].regs->reg_type.bitfield.dword)
9731               && ((i.base_reg == NULL && i.index_reg == NULL)
9732 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
9733                   || (x86_elf_abi == X86_64_X32_ABI
9734                       && i.base_reg
9735                       && i.base_reg->reg_num == RegIP
9736                       && i.base_reg->reg_type.bitfield.qword))
9737 #else
9738                   || 0)
9739 #endif
9740               && !add_prefix (ADDR_PREFIX_OPCODE))
9741             return 0;
9742
9743           if (flag_code == CODE_32BIT)
9744             need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
9745           else if (i.prefix[ADDR_PREFIX])
9746             need = need_dword;
9747           else
9748             need = flag_code == CODE_64BIT ? need_qword : need_word;
9749
9750           for (op = 0; op < i.operands; op++)
9751             {
9752               if (i.types[op].bitfield.class != Reg)
9753                 continue;
9754
9755               switch (need)
9756                 {
9757                 case need_word:
9758                   if (i.op[op].regs->reg_type.bitfield.word)
9759                     continue;
9760                   break;
9761                 case need_dword:
9762                   if (i.op[op].regs->reg_type.bitfield.dword)
9763                     continue;
9764                   break;
9765                 case need_qword:
9766                   if (i.op[op].regs->reg_type.bitfield.qword)
9767                     continue;
9768                   break;
9769                 }
9770
9771               as_bad (_("invalid register operand size for `%s'"),
9772                       insn_name (&i.tm));
9773               return 0;
9774             }
9775         }
9776     }
9777
9778   return 1;
9779 }
9780
9781 static int
9782 check_byte_reg (void)
9783 {
9784   int op;
9785
9786   for (op = i.operands; --op >= 0;)
9787     {
9788       /* Skip non-register operands. */
9789       if (i.types[op].bitfield.class != Reg)
9790         continue;
9791
9792       /* If this is an eight bit register, it's OK.  If it's the 16 or
9793          32 bit version of an eight bit register, we will just use the
9794          low portion, and that's OK too.  */
9795       if (i.types[op].bitfield.byte)
9796         continue;
9797
9798       /* I/O port address operands are OK too.  */
9799       if (i.tm.operand_types[op].bitfield.instance == RegD
9800           && i.tm.operand_types[op].bitfield.word)
9801         continue;
9802
9803       /* crc32 only wants its source operand checked here.  */
9804       if (i.tm.mnem_off == MN_crc32 && op != 0)
9805         continue;
9806
9807       /* Any other register is bad.  */
9808       as_bad (_("`%s%s' not allowed with `%s%c'"),
9809               register_prefix, i.op[op].regs->reg_name,
9810               insn_name (&i.tm), i.suffix);
9811       return 0;
9812     }
9813   return 1;
9814 }
9815
9816 static int
9817 check_long_reg (void)
9818 {
9819   int op;
9820
9821   for (op = i.operands; --op >= 0;)
9822     /* Skip non-register operands. */
9823     if (i.types[op].bitfield.class != Reg)
9824       continue;
9825     /* Reject eight bit registers, except where the template requires
9826        them. (eg. movzb)  */
9827     else if (i.types[op].bitfield.byte
9828              && (i.tm.operand_types[op].bitfield.class == Reg
9829                  || i.tm.operand_types[op].bitfield.instance == Accum)
9830              && (i.tm.operand_types[op].bitfield.word
9831                  || i.tm.operand_types[op].bitfield.dword))
9832       {
9833         as_bad (_("`%s%s' not allowed with `%s%c'"),
9834                 register_prefix,
9835                 i.op[op].regs->reg_name,
9836                 insn_name (&i.tm),
9837                 i.suffix);
9838         return 0;
9839       }
9840     /* Error if the e prefix on a general reg is missing, or if the r
9841        prefix on a general reg is present.  */
9842     else if ((i.types[op].bitfield.word
9843               || i.types[op].bitfield.qword)
9844              && (i.tm.operand_types[op].bitfield.class == Reg
9845                  || i.tm.operand_types[op].bitfield.instance == Accum)
9846              && i.tm.operand_types[op].bitfield.dword)
9847       {
9848         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
9849                 register_prefix, i.op[op].regs->reg_name,
9850                 i.suffix);
9851         return 0;
9852       }
9853   return 1;
9854 }
9855
9856 static int
9857 check_qword_reg (void)
9858 {
9859   int op;
9860
9861   for (op = i.operands; --op >= 0; )
9862     /* Skip non-register operands. */
9863     if (i.types[op].bitfield.class != Reg)
9864       continue;
9865     /* Reject eight bit registers, except where the template requires
9866        them. (eg. movzb)  */
9867     else if (i.types[op].bitfield.byte
9868              && (i.tm.operand_types[op].bitfield.class == Reg
9869                  || i.tm.operand_types[op].bitfield.instance == Accum)
9870              && (i.tm.operand_types[op].bitfield.word
9871                  || i.tm.operand_types[op].bitfield.dword
9872                  || i.tm.operand_types[op].bitfield.qword))
9873       {
9874         as_bad (_("`%s%s' not allowed with `%s%c'"),
9875                 register_prefix,
9876                 i.op[op].regs->reg_name,
9877                 insn_name (&i.tm),
9878                 i.suffix);
9879         return 0;
9880       }
9881     /* Error if the r prefix on a general reg is missing.  */
9882     else if ((i.types[op].bitfield.word
9883               || i.types[op].bitfield.dword)
9884              && (i.tm.operand_types[op].bitfield.class == Reg
9885                  || i.tm.operand_types[op].bitfield.instance == Accum)
9886              && i.tm.operand_types[op].bitfield.qword)
9887       {
9888         /* Prohibit these changes in the 64bit mode, since the
9889            lowering is more complicated.  */
9890         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
9891                 register_prefix, i.op[op].regs->reg_name, i.suffix);
9892         return 0;
9893       }
9894   return 1;
9895 }
9896
9897 static int
9898 check_word_reg (void)
9899 {
9900   int op;
9901   for (op = i.operands; --op >= 0;)
9902     /* Skip non-register operands. */
9903     if (i.types[op].bitfield.class != Reg)
9904       continue;
9905     /* Reject eight bit registers, except where the template requires
9906        them. (eg. movzb)  */
9907     else if (i.types[op].bitfield.byte
9908              && (i.tm.operand_types[op].bitfield.class == Reg
9909                  || i.tm.operand_types[op].bitfield.instance == Accum)
9910              && (i.tm.operand_types[op].bitfield.word
9911                  || i.tm.operand_types[op].bitfield.dword))
9912       {
9913         as_bad (_("`%s%s' not allowed with `%s%c'"),
9914                 register_prefix,
9915                 i.op[op].regs->reg_name,
9916                 insn_name (&i.tm),
9917                 i.suffix);
9918         return 0;
9919       }
9920     /* Error if the e or r prefix on a general reg is present.  */
9921     else if ((i.types[op].bitfield.dword
9922                  || i.types[op].bitfield.qword)
9923              && (i.tm.operand_types[op].bitfield.class == Reg
9924                  || i.tm.operand_types[op].bitfield.instance == Accum)
9925              && i.tm.operand_types[op].bitfield.word)
9926       {
9927         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
9928                 register_prefix, i.op[op].regs->reg_name,
9929                 i.suffix);
9930         return 0;
9931       }
9932   return 1;
9933 }
9934
9935 static int
9936 update_imm (unsigned int j)
9937 {
9938   i386_operand_type overlap = i.types[j];
9939
9940   if (i.tm.operand_types[j].bitfield.imm8
9941       && i.tm.operand_types[j].bitfield.imm8s
9942       && overlap.bitfield.imm8 && overlap.bitfield.imm8s)
9943     {
9944       /* This combination is used on 8-bit immediates where e.g. $~0 is
9945          desirable to permit.  We're past operand type matching, so simply
9946          put things back in the shape they were before introducing the
9947          distinction between Imm8, Imm8S, and Imm8|Imm8S.  */
9948       overlap.bitfield.imm8s = 0;
9949     }
9950
9951   if (overlap.bitfield.imm8
9952       + overlap.bitfield.imm8s
9953       + overlap.bitfield.imm16
9954       + overlap.bitfield.imm32
9955       + overlap.bitfield.imm32s
9956       + overlap.bitfield.imm64 > 1)
9957     {
9958       static const i386_operand_type imm16 = { .bitfield = { .imm16 = 1 } };
9959       static const i386_operand_type imm32 = { .bitfield = { .imm32 = 1 } };
9960       static const i386_operand_type imm32s = { .bitfield = { .imm32s = 1 } };
9961       static const i386_operand_type imm16_32 = { .bitfield =
9962         { .imm16 = 1, .imm32 = 1 }
9963       };
9964       static const i386_operand_type imm16_32s =  { .bitfield =
9965         { .imm16 = 1, .imm32s = 1 }
9966       };
9967       static const i386_operand_type imm16_32_32s = { .bitfield =
9968         { .imm16 = 1, .imm32 = 1, .imm32s = 1 }
9969       };
9970
9971       if (i.suffix)
9972         {
9973           i386_operand_type temp;
9974
9975           operand_type_set (&temp, 0);
9976           if (i.suffix == BYTE_MNEM_SUFFIX)
9977             {
9978               temp.bitfield.imm8 = overlap.bitfield.imm8;
9979               temp.bitfield.imm8s = overlap.bitfield.imm8s;
9980             }
9981           else if (i.suffix == WORD_MNEM_SUFFIX)
9982             temp.bitfield.imm16 = overlap.bitfield.imm16;
9983           else if (i.suffix == QWORD_MNEM_SUFFIX)
9984             {
9985               temp.bitfield.imm64 = overlap.bitfield.imm64;
9986               temp.bitfield.imm32s = overlap.bitfield.imm32s;
9987             }
9988           else
9989             temp.bitfield.imm32 = overlap.bitfield.imm32;
9990           overlap = temp;
9991         }
9992       else if (operand_type_equal (&overlap, &imm16_32_32s)
9993                || operand_type_equal (&overlap, &imm16_32)
9994                || operand_type_equal (&overlap, &imm16_32s))
9995         {
9996           if ((flag_code == CODE_16BIT)
9997               ^ (i.prefix[DATA_PREFIX] != 0 && !(i.prefix[REX_PREFIX] & REX_W)))
9998             overlap = imm16;
9999           else
10000             overlap = imm32s;
10001         }
10002       else if (i.prefix[REX_PREFIX] & REX_W)
10003         overlap = operand_type_and (overlap, imm32s);
10004       else if (i.prefix[DATA_PREFIX])
10005         overlap = operand_type_and (overlap,
10006                                     flag_code != CODE_16BIT ? imm16 : imm32);
10007       if (overlap.bitfield.imm8
10008           + overlap.bitfield.imm8s
10009           + overlap.bitfield.imm16
10010           + overlap.bitfield.imm32
10011           + overlap.bitfield.imm32s
10012           + overlap.bitfield.imm64 != 1)
10013         {
10014           as_bad (_("no instruction mnemonic suffix given; "
10015                     "can't determine immediate size"));
10016           return 0;
10017         }
10018     }
10019   i.types[j] = overlap;
10020
10021   return 1;
10022 }
10023
10024 static int
10025 finalize_imm (void)
10026 {
10027   unsigned int j, n;
10028
10029   /* Update the first 2 immediate operands.  */
10030   n = i.operands > 2 ? 2 : i.operands;
10031   if (n)
10032     {
10033       for (j = 0; j < n; j++)
10034         if (update_imm (j) == 0)
10035           return 0;
10036
10037       /* The 3rd operand can't be immediate operand.  */
10038       gas_assert (operand_type_check (i.types[2], imm) == 0);
10039     }
10040
10041   return 1;
10042 }
10043
10044 static INLINE void set_rex_vrex (const reg_entry *r, unsigned int rex_bit,
10045                                  bool do_sse2avx)
10046 {
10047   if (r->reg_flags & RegRex)
10048     {
10049       if (i.rex & rex_bit)
10050         as_bad (_("same type of prefix used twice"));
10051       i.rex |= rex_bit;
10052     }
10053   else if (do_sse2avx && (i.rex & rex_bit) && i.vex.register_specifier)
10054     {
10055       gas_assert (i.vex.register_specifier == r);
10056       i.vex.register_specifier += 8;
10057     }
10058
10059   if (r->reg_flags & RegVRex)
10060     i.vrex |= rex_bit;
10061
10062   if (r->reg_flags & RegRex2)
10063     i.rex2 |= rex_bit;
10064 }
10065
10066 static INLINE void
10067 set_rex_rex2 (const reg_entry *r, unsigned int rex_bit)
10068 {
10069   if ((r->reg_flags & RegRex) != 0)
10070     i.rex |= rex_bit;
10071   if ((r->reg_flags & RegRex2) != 0)
10072     i.rex2 |= rex_bit;
10073 }
10074
10075 static int
10076 process_operands (void)
10077 {
10078   /* Default segment register this instruction will use for memory
10079      accesses.  0 means unknown.  This is only for optimizing out
10080      unnecessary segment overrides.  */
10081   const reg_entry *default_seg = NULL;
10082
10083   for (unsigned int j = 0; j < i.operands; j++)
10084     if (i.types[j].bitfield.instance != InstanceNone)
10085       i.reg_operands--;
10086
10087   if (i.tm.opcode_modifier.sse2avx)
10088     {
10089       /* Legacy encoded insns allow explicit REX prefixes, so these prefixes
10090          need converting.  */
10091       i.rex |= i.prefix[REX_PREFIX] & (REX_W | REX_R | REX_X | REX_B);
10092       i.prefix[REX_PREFIX] = 0;
10093       i.rex_encoding = 0;
10094       i.rex2_encoding = 0;
10095     }
10096   /* ImmExt should be processed after SSE2AVX.  */
10097   else if (i.tm.opcode_modifier.immext)
10098     process_immext ();
10099
10100   /* TILEZERO is unusual in that it has a single operand encoded in ModR/M.reg,
10101      not ModR/M.rm.  To avoid special casing this in build_modrm_byte(), fake a
10102      new destination operand here, while converting the source one to register
10103      number 0.  */
10104   if (i.tm.mnem_off == MN_tilezero)
10105     {
10106       i.op[1].regs = i.op[0].regs;
10107       i.op[0].regs -= i.op[0].regs->reg_num;
10108       i.types[1] = i.types[0];
10109       i.tm.operand_types[1] = i.tm.operand_types[0];
10110       i.flags[1] = i.flags[0];
10111       i.operands++;
10112       i.reg_operands++;
10113       i.tm.operands++;
10114     }
10115
10116   if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
10117     {
10118       static const i386_operand_type regxmm = {
10119         .bitfield = { .class = RegSIMD, .xmmword = 1 }
10120       };
10121       unsigned int dupl = i.operands;
10122       unsigned int dest = dupl - 1;
10123       unsigned int j;
10124
10125       /* The destination must be an xmm register.  */
10126       gas_assert (i.reg_operands
10127                   && MAX_OPERANDS > dupl
10128                   && operand_type_equal (&i.types[dest], &regxmm));
10129
10130       if (i.tm.operand_types[0].bitfield.instance == Accum
10131           && i.tm.operand_types[0].bitfield.xmmword)
10132         {
10133           /* Keep xmm0 for instructions with VEX prefix and 3
10134              sources.  */
10135           i.tm.operand_types[0].bitfield.instance = InstanceNone;
10136           i.tm.operand_types[0].bitfield.class = RegSIMD;
10137           i.reg_operands++;
10138           goto duplicate;
10139         }
10140
10141       if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_1ST_XMM0)
10142         {
10143           gas_assert ((MAX_OPERANDS - 1) > dupl);
10144
10145           /* Add the implicit xmm0 for instructions with VEX prefix
10146              and 3 sources.  */
10147           for (j = i.operands; j > 0; j--)
10148             {
10149               i.op[j] = i.op[j - 1];
10150               i.types[j] = i.types[j - 1];
10151               i.tm.operand_types[j] = i.tm.operand_types[j - 1];
10152               i.flags[j] = i.flags[j - 1];
10153             }
10154           i.op[0].regs
10155             = (const reg_entry *) str_hash_find (reg_hash, "xmm0");
10156           i.types[0] = regxmm;
10157           i.tm.operand_types[0] = regxmm;
10158
10159           i.operands += 2;
10160           i.reg_operands += 2;
10161           i.tm.operands += 2;
10162
10163           dupl++;
10164           dest++;
10165           i.op[dupl] = i.op[dest];
10166           i.types[dupl] = i.types[dest];
10167           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
10168           i.flags[dupl] = i.flags[dest];
10169         }
10170       else
10171         {
10172         duplicate:
10173           i.operands++;
10174           i.reg_operands++;
10175           i.tm.operands++;
10176
10177           i.op[dupl] = i.op[dest];
10178           i.types[dupl] = i.types[dest];
10179           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
10180           i.flags[dupl] = i.flags[dest];
10181         }
10182
10183        if (i.tm.opcode_modifier.immext)
10184          process_immext ();
10185     }
10186   else if (i.tm.operand_types[0].bitfield.instance == Accum
10187            && i.tm.opcode_modifier.modrm)
10188     {
10189       unsigned int j;
10190
10191       for (j = 1; j < i.operands; j++)
10192         {
10193           i.op[j - 1] = i.op[j];
10194           i.types[j - 1] = i.types[j];
10195
10196           /* We need to adjust fields in i.tm since they are used by
10197              build_modrm_byte.  */
10198           i.tm.operand_types [j - 1] = i.tm.operand_types [j];
10199
10200           i.flags[j - 1] = i.flags[j];
10201         }
10202
10203       /* No adjustment to i.reg_operands: This was already done at the top
10204          of the function.  */
10205       i.operands--;
10206       i.tm.operands--;
10207     }
10208   else if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_QUAD_GROUP)
10209     {
10210       unsigned int regnum, first_reg_in_group, last_reg_in_group;
10211
10212       /* The second operand must be {x,y,z}mmN, where N is a multiple of 4. */
10213       gas_assert (i.operands >= 2 && i.types[1].bitfield.class == RegSIMD);
10214       regnum = register_number (i.op[1].regs);
10215       first_reg_in_group = regnum & ~3;
10216       last_reg_in_group = first_reg_in_group + 3;
10217       if (regnum != first_reg_in_group)
10218         as_warn (_("source register `%s%s' implicitly denotes"
10219                    " `%s%.3s%u' to `%s%.3s%u' source group in `%s'"),
10220                  register_prefix, i.op[1].regs->reg_name,
10221                  register_prefix, i.op[1].regs->reg_name, first_reg_in_group,
10222                  register_prefix, i.op[1].regs->reg_name, last_reg_in_group,
10223                  insn_name (&i.tm));
10224     }
10225   else if (i.tm.opcode_modifier.operandconstraint == REG_KLUDGE)
10226     {
10227       /* The imul $imm, %reg instruction is converted into
10228          imul $imm, %reg, %reg, and the clr %reg instruction
10229          is converted into xor %reg, %reg.  */
10230
10231       unsigned int first_reg_op;
10232
10233       if (operand_type_check (i.types[0], reg))
10234         first_reg_op = 0;
10235       else
10236         first_reg_op = 1;
10237       /* Pretend we saw the extra register operand.  */
10238       gas_assert (i.reg_operands == 1
10239                   && i.op[first_reg_op + 1].regs == 0);
10240       i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs;
10241       i.types[first_reg_op + 1] = i.types[first_reg_op];
10242       i.operands++;
10243       i.reg_operands++;
10244     }
10245
10246   if (i.tm.opcode_modifier.modrm)
10247     {
10248       /* The opcode is completed (modulo i.tm.extension_opcode which
10249          must be put into the modrm byte).  Now, we make the modrm and
10250          index base bytes based on all the info we've collected.  */
10251
10252       default_seg = build_modrm_byte ();
10253
10254       if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
10255         {
10256           /* Warn about some common errors, but press on regardless.  */
10257           if (i.operands == 2)
10258             {
10259               /* Reversed arguments on faddp or fmulp.  */
10260               as_warn (_("translating to `%s %s%s,%s%s'"), insn_name (&i.tm),
10261                        register_prefix, i.op[!intel_syntax].regs->reg_name,
10262                        register_prefix, i.op[intel_syntax].regs->reg_name);
10263             }
10264           else if (i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
10265             {
10266               /* Extraneous `l' suffix on fp insn.  */
10267               as_warn (_("translating to `%s %s%s'"), insn_name (&i.tm),
10268                        register_prefix, i.op[0].regs->reg_name);
10269             }
10270         }
10271     }
10272   else if (i.types[0].bitfield.class == SReg && !dot_insn ())
10273     {
10274       if (flag_code != CODE_64BIT
10275           ? i.tm.base_opcode == POP_SEG_SHORT
10276             && i.op[0].regs->reg_num == 1
10277           : (i.tm.base_opcode | 1) == (POP_SEG386_SHORT & 0xff)
10278             && i.op[0].regs->reg_num < 4)
10279         {
10280           as_bad (_("you can't `%s %s%s'"),
10281                   insn_name (&i.tm), register_prefix, i.op[0].regs->reg_name);
10282           return 0;
10283         }
10284       if (i.op[0].regs->reg_num > 3
10285           && i.tm.opcode_space == SPACE_BASE )
10286         {
10287           i.tm.base_opcode ^= (POP_SEG_SHORT ^ POP_SEG386_SHORT) & 0xff;
10288           i.tm.opcode_space = SPACE_0F;
10289         }
10290       i.tm.base_opcode |= (i.op[0].regs->reg_num << 3);
10291     }
10292   else if (i.tm.opcode_space == SPACE_BASE
10293            && (i.tm.base_opcode & ~3) == MOV_AX_DISP32)
10294     {
10295       default_seg = reg_ds;
10296     }
10297   else if (i.tm.opcode_modifier.isstring)
10298     {
10299       /* For the string instructions that allow a segment override
10300          on one of their operands, the default segment is ds.  */
10301       default_seg = reg_ds;
10302     }
10303   else if (i.short_form)
10304     {
10305       /* The register operand is in the 1st or 2nd non-immediate operand.  */
10306       const reg_entry *r = i.op[i.imm_operands].regs;
10307
10308       if (!dot_insn ()
10309           && r->reg_type.bitfield.instance == Accum
10310           && i.op[i.imm_operands + 1].regs)
10311         r = i.op[i.imm_operands + 1].regs;
10312       /* Register goes in low 3 bits of opcode.  */
10313       i.tm.base_opcode |= r->reg_num;
10314       set_rex_vrex (r, REX_B, false);
10315
10316       if (dot_insn () && i.reg_operands == 2)
10317         {
10318           gas_assert (is_any_vex_encoding (&i.tm)
10319                       || i.encoding != encoding_default);
10320           i.vex.register_specifier = i.op[i.operands - 1].regs;
10321         }
10322     }
10323   else if (i.reg_operands == 1
10324            && !i.flags[i.operands - 1]
10325            && i.tm.operand_types[i.operands - 1].bitfield.instance
10326               == InstanceNone)
10327     {
10328       gas_assert (is_any_vex_encoding (&i.tm)
10329                   || i.encoding != encoding_default);
10330       i.vex.register_specifier = i.op[i.operands - 1].regs;
10331     }
10332
10333   if ((i.seg[0] || i.prefix[SEG_PREFIX])
10334       && i.tm.mnem_off == MN_lea)
10335     {
10336       if (!quiet_warnings)
10337         as_warn (_("segment override on `%s' is ineffectual"), insn_name (&i.tm));
10338       if (optimize && !i.no_optimize)
10339         {
10340           i.seg[0] = NULL;
10341           i.prefix[SEG_PREFIX] = 0;
10342         }
10343     }
10344
10345   /* If a segment was explicitly specified, and the specified segment
10346      is neither the default nor the one already recorded from a prefix,
10347      use an opcode prefix to select it.  If we never figured out what
10348      the default segment is, then default_seg will be zero at this
10349      point, and the specified segment prefix will always be used.  */
10350   if (i.seg[0]
10351       && i.seg[0] != default_seg
10352       && i386_seg_prefixes[i.seg[0]->reg_num] != i.prefix[SEG_PREFIX])
10353     {
10354       if (!add_prefix (i386_seg_prefixes[i.seg[0]->reg_num]))
10355         return 0;
10356     }
10357   return 1;
10358 }
10359
10360 static const reg_entry *
10361 build_modrm_byte (void)
10362 {
10363   const reg_entry *default_seg = NULL;
10364   unsigned int source = i.imm_operands - i.tm.opcode_modifier.immext
10365                         /* Compensate for kludge in md_assemble().  */
10366                         + i.tm.operand_types[0].bitfield.imm1;
10367   unsigned int dest = i.operands - 1 - i.tm.opcode_modifier.immext;
10368   unsigned int v, op, reg_slot;
10369
10370   /* Accumulator (in particular %st), shift count (%cl), and alike need
10371      to be skipped just like immediate operands do.  */
10372   if (i.tm.operand_types[source].bitfield.instance)
10373     ++source;
10374   while (i.tm.operand_types[dest].bitfield.instance)
10375     --dest;
10376
10377   for (op = source; op < i.operands; ++op)
10378     if (i.tm.operand_types[op].bitfield.baseindex)
10379       break;
10380
10381   if (i.reg_operands + i.mem_operands + (i.tm.extension_opcode != None) == 4)
10382     {
10383       expressionS *exp;
10384
10385       /* There are 2 kinds of instructions:
10386          1. 5 operands: 4 register operands or 3 register operands
10387          plus 1 memory operand plus one Imm4 operand, VexXDS, and
10388          VexW0 or VexW1.  The destination must be either XMM, YMM or
10389          ZMM register.
10390          2. 4 operands: 4 register operands or 3 register operands
10391          plus 1 memory operand, with VexXDS.
10392          3. Other equivalent combinations when coming from s_insn().  */
10393       gas_assert (i.tm.opcode_modifier.vexvvvv
10394                   && i.tm.opcode_modifier.vexw);
10395       gas_assert (dot_insn ()
10396                   || i.tm.operand_types[dest].bitfield.class == RegSIMD);
10397
10398       /* Of the first two non-immediate operands the one with the template
10399          not allowing for a memory one is encoded in the immediate operand.  */
10400       if (source == op)
10401         reg_slot = source + 1;
10402       else
10403         reg_slot = source++;
10404
10405       if (!dot_insn ())
10406         {
10407           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
10408           gas_assert (!(i.op[reg_slot].regs->reg_flags & RegVRex));
10409         }
10410       else
10411         gas_assert (i.tm.operand_types[reg_slot].bitfield.class != ClassNone);
10412
10413       if (i.imm_operands == 0)
10414         {
10415           /* When there is no immediate operand, generate an 8bit
10416              immediate operand to encode the first operand.  */
10417           exp = &im_expressions[i.imm_operands++];
10418           i.op[i.operands].imms = exp;
10419           i.types[i.operands].bitfield.imm8 = 1;
10420           i.operands++;
10421
10422           exp->X_op = O_constant;
10423         }
10424       else
10425         {
10426           gas_assert (i.imm_operands == 1);
10427           gas_assert (fits_in_imm4 (i.op[0].imms->X_add_number));
10428           gas_assert (!i.tm.opcode_modifier.immext);
10429
10430           /* Turn on Imm8 again so that output_imm will generate it.  */
10431           i.types[0].bitfield.imm8 = 1;
10432
10433           exp = i.op[0].imms;
10434         }
10435       exp->X_add_number |= register_number (i.op[reg_slot].regs)
10436                            << (3 + !(i.tm.opcode_modifier.evex
10437                                      || i.encoding == encoding_evex));
10438     }
10439
10440   switch (i.tm.opcode_modifier.vexvvvv)
10441     {
10442     /* VEX.vvvv encodes the last source register operand.  */
10443     case VexVVVV_SRC2:
10444       if (source != op)
10445         {
10446           v = source++;
10447           break;
10448         }
10449       /* For vprot*, vpshl*, and vpsha*, XOP.W controls the swapping of src1
10450          and src2, and it requires fall through when the operands are swapped.
10451        */
10452       /* Fall through.  */
10453     /* VEX.vvvv encodes the first source register operand.  */
10454     case VexVVVV_SRC1:
10455       v =  dest - 1;
10456       break;
10457     /* VEX.vvvv encodes the destination register operand.  */
10458     case VexVVVV_DST:
10459       v = dest--;
10460       break;
10461     default:
10462       v = ~0;
10463       break;
10464      }
10465
10466   if (dest == source)
10467     dest = ~0;
10468
10469   gas_assert (source < dest);
10470
10471   if (v < MAX_OPERANDS)
10472     {
10473       gas_assert (i.tm.opcode_modifier.vexvvvv);
10474       i.vex.register_specifier = i.op[v].regs;
10475     }
10476
10477   if (op < i.operands)
10478     {
10479       if (i.mem_operands)
10480         {
10481           unsigned int fake_zero_displacement = 0;
10482
10483           gas_assert (i.flags[op] & Operand_Mem);
10484
10485           if (i.tm.opcode_modifier.sib)
10486             {
10487               /* The index register of VSIB shouldn't be RegIZ.  */
10488               if (i.tm.opcode_modifier.sib != SIBMEM
10489                   && i.index_reg->reg_num == RegIZ)
10490                 abort ();
10491
10492               i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
10493               if (!i.base_reg)
10494                 {
10495                   i.sib.base = NO_BASE_REGISTER;
10496                   i.sib.scale = i.log2_scale_factor;
10497                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
10498                   i.types[op].bitfield.disp32 = 1;
10499                 }
10500
10501               /* Since the mandatory SIB always has index register, so
10502                  the code logic remains unchanged. The non-mandatory SIB
10503                  without index register is allowed and will be handled
10504                  later.  */
10505               if (i.index_reg)
10506                 {
10507                   if (i.index_reg->reg_num == RegIZ)
10508                     i.sib.index = NO_INDEX_REGISTER;
10509                   else
10510                     i.sib.index = i.index_reg->reg_num;
10511                   set_rex_vrex (i.index_reg, REX_X, false);
10512                 }
10513             }
10514
10515           default_seg = reg_ds;
10516
10517           if (i.base_reg == 0)
10518             {
10519               i.rm.mode = 0;
10520               if (!i.disp_operands)
10521                 fake_zero_displacement = 1;
10522               if (i.index_reg == 0)
10523                 {
10524                   /* Both check for VSIB and mandatory non-vector SIB. */
10525                   gas_assert (!i.tm.opcode_modifier.sib
10526                               || i.tm.opcode_modifier.sib == SIBMEM);
10527                   /* Operand is just <disp>  */
10528                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
10529                   if (flag_code == CODE_64BIT)
10530                     {
10531                       /* 64bit mode overwrites the 32bit absolute
10532                          addressing by RIP relative addressing and
10533                          absolute addressing is encoded by one of the
10534                          redundant SIB forms.  */
10535                       i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
10536                       i.sib.base = NO_BASE_REGISTER;
10537                       i.sib.index = NO_INDEX_REGISTER;
10538                       i.types[op].bitfield.disp32 = 1;
10539                     }
10540                   else if ((flag_code == CODE_16BIT)
10541                            ^ (i.prefix[ADDR_PREFIX] != 0))
10542                     {
10543                       i.rm.regmem = NO_BASE_REGISTER_16;
10544                       i.types[op].bitfield.disp16 = 1;
10545                     }
10546                   else
10547                     {
10548                       i.rm.regmem = NO_BASE_REGISTER;
10549                       i.types[op].bitfield.disp32 = 1;
10550                     }
10551                 }
10552               else if (!i.tm.opcode_modifier.sib)
10553                 {
10554                   /* !i.base_reg && i.index_reg  */
10555                   if (i.index_reg->reg_num == RegIZ)
10556                     i.sib.index = NO_INDEX_REGISTER;
10557                   else
10558                     i.sib.index = i.index_reg->reg_num;
10559                   i.sib.base = NO_BASE_REGISTER;
10560                   i.sib.scale = i.log2_scale_factor;
10561                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
10562                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
10563                   i.types[op].bitfield.disp32 = 1;
10564                   set_rex_rex2 (i.index_reg, REX_X);
10565                 }
10566             }
10567           /* RIP addressing for 64bit mode.  */
10568           else if (i.base_reg->reg_num == RegIP)
10569             {
10570               gas_assert (!i.tm.opcode_modifier.sib);
10571               i.rm.regmem = NO_BASE_REGISTER;
10572               i.types[op].bitfield.disp8 = 0;
10573               i.types[op].bitfield.disp16 = 0;
10574               i.types[op].bitfield.disp32 = 1;
10575               i.types[op].bitfield.disp64 = 0;
10576               i.flags[op] |= Operand_PCrel;
10577               if (! i.disp_operands)
10578                 fake_zero_displacement = 1;
10579             }
10580           else if (i.base_reg->reg_type.bitfield.word)
10581             {
10582               gas_assert (!i.tm.opcode_modifier.sib);
10583               switch (i.base_reg->reg_num)
10584                 {
10585                 case 3: /* (%bx)  */
10586                   if (i.index_reg == 0)
10587                     i.rm.regmem = 7;
10588                   else /* (%bx,%si) -> 0, or (%bx,%di) -> 1  */
10589                     i.rm.regmem = i.index_reg->reg_num - 6;
10590                   break;
10591                 case 5: /* (%bp)  */
10592                   default_seg = reg_ss;
10593                   if (i.index_reg == 0)
10594                     {
10595                       i.rm.regmem = 6;
10596                       if (operand_type_check (i.types[op], disp) == 0)
10597                         {
10598                           /* fake (%bp) into 0(%bp)  */
10599                           if (i.disp_encoding == disp_encoding_16bit)
10600                             i.types[op].bitfield.disp16 = 1;
10601                           else
10602                             i.types[op].bitfield.disp8 = 1;
10603                           fake_zero_displacement = 1;
10604                         }
10605                     }
10606                   else /* (%bp,%si) -> 2, or (%bp,%di) -> 3  */
10607                     i.rm.regmem = i.index_reg->reg_num - 6 + 2;
10608                   break;
10609                 default: /* (%si) -> 4 or (%di) -> 5  */
10610                   i.rm.regmem = i.base_reg->reg_num - 6 + 4;
10611                 }
10612               if (!fake_zero_displacement
10613                   && !i.disp_operands
10614                   && i.disp_encoding)
10615                 {
10616                   fake_zero_displacement = 1;
10617                   if (i.disp_encoding == disp_encoding_8bit)
10618                     i.types[op].bitfield.disp8 = 1;
10619                   else
10620                     i.types[op].bitfield.disp16 = 1;
10621                 }
10622               i.rm.mode = mode_from_disp_size (i.types[op]);
10623             }
10624           else /* i.base_reg and 32/64 bit mode  */
10625             {
10626               if (operand_type_check (i.types[op], disp))
10627                 {
10628                   i.types[op].bitfield.disp16 = 0;
10629                   i.types[op].bitfield.disp64 = 0;
10630                   i.types[op].bitfield.disp32 = 1;
10631                 }
10632
10633               if (!i.tm.opcode_modifier.sib)
10634                 i.rm.regmem = i.base_reg->reg_num;
10635               set_rex_rex2 (i.base_reg, REX_B);
10636               i.sib.base = i.base_reg->reg_num;
10637               /* x86-64 ignores REX prefix bit here to avoid decoder
10638                  complications.  */
10639               if (!(i.base_reg->reg_flags & RegRex)
10640                   && (i.base_reg->reg_num == EBP_REG_NUM
10641                    || i.base_reg->reg_num == ESP_REG_NUM))
10642                   default_seg = reg_ss;
10643               if (i.base_reg->reg_num == 5 && i.disp_operands == 0)
10644                 {
10645                   fake_zero_displacement = 1;
10646                   if (i.disp_encoding == disp_encoding_32bit)
10647                     i.types[op].bitfield.disp32 = 1;
10648                   else
10649                     i.types[op].bitfield.disp8 = 1;
10650                 }
10651               i.sib.scale = i.log2_scale_factor;
10652               if (i.index_reg == 0)
10653                 {
10654                   /* Only check for VSIB. */
10655                   gas_assert (i.tm.opcode_modifier.sib != VECSIB128
10656                               && i.tm.opcode_modifier.sib != VECSIB256
10657                               && i.tm.opcode_modifier.sib != VECSIB512);
10658
10659                   /* <disp>(%esp) becomes two byte modrm with no index
10660                      register.  We've already stored the code for esp
10661                      in i.rm.regmem ie. ESCAPE_TO_TWO_BYTE_ADDRESSING.
10662                      Any base register besides %esp will not use the
10663                      extra modrm byte.  */
10664                   i.sib.index = NO_INDEX_REGISTER;
10665                 }
10666               else if (!i.tm.opcode_modifier.sib)
10667                 {
10668                   if (i.index_reg->reg_num == RegIZ)
10669                     i.sib.index = NO_INDEX_REGISTER;
10670                   else
10671                     i.sib.index = i.index_reg->reg_num;
10672                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
10673                   set_rex_rex2 (i.index_reg, REX_X);
10674                 }
10675
10676               if (i.disp_operands
10677                   && (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
10678                       || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL))
10679                 i.rm.mode = 0;
10680               else
10681                 {
10682                   if (!fake_zero_displacement
10683                       && !i.disp_operands
10684                       && i.disp_encoding)
10685                     {
10686                       fake_zero_displacement = 1;
10687                       if (i.disp_encoding == disp_encoding_8bit)
10688                         i.types[op].bitfield.disp8 = 1;
10689                       else
10690                         i.types[op].bitfield.disp32 = 1;
10691                     }
10692                   i.rm.mode = mode_from_disp_size (i.types[op]);
10693                 }
10694             }
10695
10696           if (fake_zero_displacement)
10697             {
10698               /* Fakes a zero displacement assuming that i.types[op]
10699                  holds the correct displacement size.  */
10700               expressionS *exp;
10701
10702               gas_assert (i.op[op].disps == 0);
10703               exp = &disp_expressions[i.disp_operands++];
10704               i.op[op].disps = exp;
10705               exp->X_op = O_constant;
10706               exp->X_add_number = 0;
10707               exp->X_add_symbol = (symbolS *) 0;
10708               exp->X_op_symbol = (symbolS *) 0;
10709             }
10710         }
10711     else
10712         {
10713       i.rm.mode = 3;
10714       i.rm.regmem = i.op[op].regs->reg_num;
10715       set_rex_vrex (i.op[op].regs, REX_B, false);
10716         }
10717
10718       if (op == dest)
10719         dest = ~0;
10720       if (op == source)
10721         source = ~0;
10722     }
10723   else
10724     {
10725       i.rm.mode = 3;
10726       if (!i.tm.opcode_modifier.regmem)
10727         {
10728           gas_assert (source < MAX_OPERANDS);
10729           i.rm.regmem = i.op[source].regs->reg_num;
10730           set_rex_vrex (i.op[source].regs, REX_B,
10731                         dest >= MAX_OPERANDS && i.tm.opcode_modifier.sse2avx);
10732           source = ~0;
10733         }
10734       else
10735         {
10736           gas_assert (dest < MAX_OPERANDS);
10737           i.rm.regmem = i.op[dest].regs->reg_num;
10738           set_rex_vrex (i.op[dest].regs, REX_B, i.tm.opcode_modifier.sse2avx);
10739           dest = ~0;
10740         }
10741     }
10742
10743   /* Fill in i.rm.reg field with extension opcode (if any) or the
10744      appropriate register.  */
10745   if (i.tm.extension_opcode != None)
10746     i.rm.reg = i.tm.extension_opcode;
10747   else if (!i.tm.opcode_modifier.regmem && dest < MAX_OPERANDS)
10748     {
10749       i.rm.reg = i.op[dest].regs->reg_num;
10750       set_rex_vrex (i.op[dest].regs, REX_R, i.tm.opcode_modifier.sse2avx);
10751     }
10752   else
10753     {
10754       gas_assert (source < MAX_OPERANDS);
10755       i.rm.reg = i.op[source].regs->reg_num;
10756       set_rex_vrex (i.op[source].regs, REX_R, false);
10757     }
10758
10759   if (flag_code != CODE_64BIT && (i.rex & REX_R))
10760     {
10761       gas_assert (i.types[!i.tm.opcode_modifier.regmem].bitfield.class == RegCR);
10762       i.rex &= ~REX_R;
10763       add_prefix (LOCK_PREFIX_OPCODE);
10764     }
10765
10766   return default_seg;
10767 }
10768
10769 static INLINE void
10770 frag_opcode_byte (unsigned char byte)
10771 {
10772   if (now_seg != absolute_section)
10773     FRAG_APPEND_1_CHAR (byte);
10774   else
10775     ++abs_section_offset;
10776 }
10777
10778 static unsigned int
10779 flip_code16 (unsigned int code16)
10780 {
10781   gas_assert (i.tm.operands == 1);
10782
10783   return !(i.prefix[REX_PREFIX] & REX_W)
10784          && (code16 ? i.tm.operand_types[0].bitfield.disp32
10785                     : i.tm.operand_types[0].bitfield.disp16)
10786          ? CODE16 : 0;
10787 }
10788
10789 static void
10790 output_branch (void)
10791 {
10792   char *p;
10793   int size;
10794   int code16;
10795   int prefix;
10796   relax_substateT subtype;
10797   symbolS *sym;
10798   offsetT off;
10799
10800   if (now_seg == absolute_section)
10801     {
10802       as_bad (_("relaxable branches not supported in absolute section"));
10803       return;
10804     }
10805
10806   code16 = flag_code == CODE_16BIT ? CODE16 : 0;
10807   size = i.disp_encoding > disp_encoding_8bit ? BIG : SMALL;
10808
10809   prefix = 0;
10810   if (i.prefix[DATA_PREFIX] != 0)
10811     {
10812       prefix = 1;
10813       i.prefixes -= 1;
10814       code16 ^= flip_code16(code16);
10815     }
10816   /* Pentium4 branch hints.  */
10817   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
10818       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
10819     {
10820       prefix++;
10821       i.prefixes--;
10822     }
10823   if (i.prefix[REX_PREFIX] != 0)
10824     {
10825       prefix++;
10826       i.prefixes--;
10827     }
10828
10829   /* BND prefixed jump.  */
10830   if (i.prefix[BND_PREFIX] != 0)
10831     {
10832       prefix++;
10833       i.prefixes--;
10834     }
10835
10836   if (i.prefixes != 0)
10837     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
10838
10839   /* It's always a symbol;  End frag & setup for relax.
10840      Make sure there is enough room in this frag for the largest
10841      instruction we may generate in md_convert_frag.  This is 2
10842      bytes for the opcode and room for the prefix and largest
10843      displacement.  */
10844   frag_grow (prefix + 2 + 4);
10845   /* Prefix and 1 opcode byte go in fr_fix.  */
10846   p = frag_more (prefix + 1);
10847   if (i.prefix[DATA_PREFIX] != 0)
10848     *p++ = DATA_PREFIX_OPCODE;
10849   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE
10850       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE)
10851     *p++ = i.prefix[SEG_PREFIX];
10852   if (i.prefix[BND_PREFIX] != 0)
10853     *p++ = BND_PREFIX_OPCODE;
10854   if (i.prefix[REX_PREFIX] != 0)
10855     *p++ = i.prefix[REX_PREFIX];
10856   *p = i.tm.base_opcode;
10857
10858   if ((unsigned char) *p == JUMP_PC_RELATIVE)
10859     subtype = ENCODE_RELAX_STATE (UNCOND_JUMP, size);
10860   else if (cpu_arch_flags.bitfield.cpui386)
10861     subtype = ENCODE_RELAX_STATE (COND_JUMP, size);
10862   else
10863     subtype = ENCODE_RELAX_STATE (COND_JUMP86, size);
10864   subtype |= code16;
10865
10866   sym = i.op[0].disps->X_add_symbol;
10867   off = i.op[0].disps->X_add_number;
10868
10869   if (i.op[0].disps->X_op != O_constant
10870       && i.op[0].disps->X_op != O_symbol)
10871     {
10872       /* Handle complex expressions.  */
10873       sym = make_expr_symbol (i.op[0].disps);
10874       off = 0;
10875     }
10876
10877   /* 1 possible extra opcode + 4 byte displacement go in var part.
10878      Pass reloc in fr_var.  */
10879   frag_var (rs_machine_dependent, 5, i.reloc[0], subtype, sym, off, p);
10880 }
10881
10882 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
10883 /* Return TRUE iff PLT32 relocation should be used for branching to
10884    symbol S.  */
10885
10886 static bool
10887 need_plt32_p (symbolS *s)
10888 {
10889   /* PLT32 relocation is ELF only.  */
10890   if (!IS_ELF)
10891     return false;
10892
10893 #ifdef TE_SOLARIS
10894   /* Don't emit PLT32 relocation on Solaris: neither native linker nor
10895      krtld support it.  */
10896   return false;
10897 #endif
10898
10899   /* Since there is no need to prepare for PLT branch on x86-64, we
10900      can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
10901      be used as a marker for 32-bit PC-relative branches.  */
10902   if (!object_64bit)
10903     return false;
10904
10905   if (s == NULL)
10906     return false;
10907
10908   /* Weak or undefined symbol need PLT32 relocation.  */
10909   if (S_IS_WEAK (s) || !S_IS_DEFINED (s))
10910     return true;
10911
10912   /* Non-global symbol doesn't need PLT32 relocation.  */
10913   if (! S_IS_EXTERNAL (s))
10914     return false;
10915
10916   /* Other global symbols need PLT32 relocation.  NB: Symbol with
10917      non-default visibilities are treated as normal global symbol
10918      so that PLT32 relocation can be used as a marker for 32-bit
10919      PC-relative branches.  It is useful for linker relaxation.  */
10920   return true;
10921 }
10922 #endif
10923
10924 static void
10925 output_jump (void)
10926 {
10927   char *p;
10928   int size;
10929   fixS *fixP;
10930   bfd_reloc_code_real_type jump_reloc = i.reloc[0];
10931
10932   if (i.tm.opcode_modifier.jump == JUMP_BYTE)
10933     {
10934       /* This is a loop or jecxz type instruction.  */
10935       size = 1;
10936       if (i.prefix[ADDR_PREFIX] != 0)
10937         {
10938           frag_opcode_byte (ADDR_PREFIX_OPCODE);
10939           i.prefixes -= 1;
10940         }
10941       /* Pentium4 branch hints.  */
10942       if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
10943           || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
10944         {
10945           frag_opcode_byte (i.prefix[SEG_PREFIX]);
10946           i.prefixes--;
10947         }
10948     }
10949   else
10950     {
10951       int code16;
10952
10953       code16 = 0;
10954       if (flag_code == CODE_16BIT)
10955         code16 = CODE16;
10956
10957       if (i.prefix[DATA_PREFIX] != 0)
10958         {
10959           frag_opcode_byte (DATA_PREFIX_OPCODE);
10960           i.prefixes -= 1;
10961           code16 ^= flip_code16(code16);
10962         }
10963
10964       size = 4;
10965       if (code16)
10966         size = 2;
10967     }
10968
10969   /* BND prefixed jump.  */
10970   if (i.prefix[BND_PREFIX] != 0)
10971     {
10972       frag_opcode_byte (i.prefix[BND_PREFIX]);
10973       i.prefixes -= 1;
10974     }
10975
10976   if (i.prefix[REX_PREFIX] != 0)
10977     {
10978       frag_opcode_byte (i.prefix[REX_PREFIX]);
10979       i.prefixes -= 1;
10980     }
10981
10982   if (i.prefixes != 0)
10983     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
10984
10985   if (now_seg == absolute_section)
10986     {
10987       abs_section_offset += i.opcode_length + size;
10988       return;
10989     }
10990
10991   p = frag_more (i.opcode_length + size);
10992   switch (i.opcode_length)
10993     {
10994     case 2:
10995       *p++ = i.tm.base_opcode >> 8;
10996       /* Fall through.  */
10997     case 1:
10998       *p++ = i.tm.base_opcode;
10999       break;
11000     default:
11001       abort ();
11002     }
11003
11004 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11005   if (flag_code == CODE_64BIT && size == 4
11006       && jump_reloc == NO_RELOC && i.op[0].disps->X_add_number == 0
11007       && need_plt32_p (i.op[0].disps->X_add_symbol))
11008     jump_reloc = BFD_RELOC_X86_64_PLT32;
11009 #endif
11010
11011   jump_reloc = reloc (size, 1, 1, jump_reloc);
11012
11013   fixP = fix_new_exp (frag_now, p - frag_now->fr_literal, size,
11014                       i.op[0].disps, 1, jump_reloc);
11015
11016   /* All jumps handled here are signed, but don't unconditionally use a
11017      signed limit check for 32 and 16 bit jumps as we want to allow wrap
11018      around at 4G (outside of 64-bit mode) and 64k (except for XBEGIN)
11019      respectively.  */
11020   switch (size)
11021     {
11022     case 1:
11023       fixP->fx_signed = 1;
11024       break;
11025
11026     case 2:
11027       if (i.tm.mnem_off == MN_xbegin)
11028         fixP->fx_signed = 1;
11029       break;
11030
11031     case 4:
11032       if (flag_code == CODE_64BIT)
11033         fixP->fx_signed = 1;
11034       break;
11035     }
11036 }
11037
11038 static void
11039 output_interseg_jump (void)
11040 {
11041   char *p;
11042   int size;
11043   int prefix;
11044   int code16;
11045
11046   code16 = 0;
11047   if (flag_code == CODE_16BIT)
11048     code16 = CODE16;
11049
11050   prefix = 0;
11051   if (i.prefix[DATA_PREFIX] != 0)
11052     {
11053       prefix = 1;
11054       i.prefixes -= 1;
11055       code16 ^= CODE16;
11056     }
11057
11058   gas_assert (!i.prefix[REX_PREFIX]);
11059
11060   size = 4;
11061   if (code16)
11062     size = 2;
11063
11064   if (i.prefixes != 0)
11065     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
11066
11067   if (now_seg == absolute_section)
11068     {
11069       abs_section_offset += prefix + 1 + 2 + size;
11070       return;
11071     }
11072
11073   /* 1 opcode; 2 segment; offset  */
11074   p = frag_more (prefix + 1 + 2 + size);
11075
11076   if (i.prefix[DATA_PREFIX] != 0)
11077     *p++ = DATA_PREFIX_OPCODE;
11078
11079   if (i.prefix[REX_PREFIX] != 0)
11080     *p++ = i.prefix[REX_PREFIX];
11081
11082   *p++ = i.tm.base_opcode;
11083   if (i.op[1].imms->X_op == O_constant)
11084     {
11085       offsetT n = i.op[1].imms->X_add_number;
11086
11087       if (size == 2
11088           && !fits_in_unsigned_word (n)
11089           && !fits_in_signed_word (n))
11090         {
11091           as_bad (_("16-bit jump out of range"));
11092           return;
11093         }
11094       md_number_to_chars (p, n, size);
11095     }
11096   else
11097     fix_new_exp (frag_now, p - frag_now->fr_literal, size,
11098                  i.op[1].imms, 0, reloc (size, 0, 0, i.reloc[1]));
11099
11100   p += size;
11101   if (i.op[0].imms->X_op == O_constant)
11102     md_number_to_chars (p, (valueT) i.op[0].imms->X_add_number, 2);
11103   else
11104     fix_new_exp (frag_now, p - frag_now->fr_literal, 2,
11105                  i.op[0].imms, 0, reloc (2, 0, 0, i.reloc[0]));
11106 }
11107
11108 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11109 void
11110 x86_cleanup (void)
11111 {
11112   char *p;
11113   asection *seg = now_seg;
11114   subsegT subseg = now_subseg;
11115   asection *sec;
11116   unsigned int alignment, align_size_1;
11117   unsigned int isa_1_descsz, feature_2_descsz, descsz;
11118   unsigned int isa_1_descsz_raw, feature_2_descsz_raw;
11119   unsigned int padding;
11120
11121   if (!IS_ELF || !x86_used_note)
11122     return;
11123
11124   x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
11125
11126   /* The .note.gnu.property section layout:
11127
11128      Field      Length          Contents
11129      ----       ----            ----
11130      n_namsz    4               4
11131      n_descsz   4               The note descriptor size
11132      n_type     4               NT_GNU_PROPERTY_TYPE_0
11133      n_name     4               "GNU"
11134      n_desc     n_descsz        The program property array
11135      ....       ....            ....
11136    */
11137
11138   /* Create the .note.gnu.property section.  */
11139   sec = subseg_new (NOTE_GNU_PROPERTY_SECTION_NAME, 0);
11140   bfd_set_section_flags (sec,
11141                          (SEC_ALLOC
11142                           | SEC_LOAD
11143                           | SEC_DATA
11144                           | SEC_HAS_CONTENTS
11145                           | SEC_READONLY));
11146
11147   if (get_elf_backend_data (stdoutput)->s->elfclass == ELFCLASS64)
11148     {
11149       align_size_1 = 7;
11150       alignment = 3;
11151     }
11152   else
11153     {
11154       align_size_1 = 3;
11155       alignment = 2;
11156     }
11157
11158   bfd_set_section_alignment (sec, alignment);
11159   elf_section_type (sec) = SHT_NOTE;
11160
11161   /* GNU_PROPERTY_X86_ISA_1_USED: 4-byte type + 4-byte data size
11162                                   + 4-byte data  */
11163   isa_1_descsz_raw = 4 + 4 + 4;
11164   /* Align GNU_PROPERTY_X86_ISA_1_USED.  */
11165   isa_1_descsz = (isa_1_descsz_raw + align_size_1) & ~align_size_1;
11166
11167   feature_2_descsz_raw = isa_1_descsz;
11168   /* GNU_PROPERTY_X86_FEATURE_2_USED: 4-byte type + 4-byte data size
11169                                       + 4-byte data  */
11170   feature_2_descsz_raw += 4 + 4 + 4;
11171   /* Align GNU_PROPERTY_X86_FEATURE_2_USED.  */
11172   feature_2_descsz = ((feature_2_descsz_raw + align_size_1)
11173                       & ~align_size_1);
11174
11175   descsz = feature_2_descsz;
11176   /* Section size: n_namsz + n_descsz + n_type + n_name + n_descsz.  */
11177   p = frag_more (4 + 4 + 4 + 4 + descsz);
11178
11179   /* Write n_namsz.  */
11180   md_number_to_chars (p, (valueT) 4, 4);
11181
11182   /* Write n_descsz.  */
11183   md_number_to_chars (p + 4, (valueT) descsz, 4);
11184
11185   /* Write n_type.  */
11186   md_number_to_chars (p + 4 * 2, (valueT) NT_GNU_PROPERTY_TYPE_0, 4);
11187
11188   /* Write n_name.  */
11189   memcpy (p + 4 * 3, "GNU", 4);
11190
11191   /* Write 4-byte type.  */
11192   md_number_to_chars (p + 4 * 4,
11193                       (valueT) GNU_PROPERTY_X86_ISA_1_USED, 4);
11194
11195   /* Write 4-byte data size.  */
11196   md_number_to_chars (p + 4 * 5, (valueT) 4, 4);
11197
11198   /* Write 4-byte data.  */
11199   md_number_to_chars (p + 4 * 6, (valueT) x86_isa_1_used, 4);
11200
11201   /* Zero out paddings.  */
11202   padding = isa_1_descsz - isa_1_descsz_raw;
11203   if (padding)
11204     memset (p + 4 * 7, 0, padding);
11205
11206   /* Write 4-byte type.  */
11207   md_number_to_chars (p + isa_1_descsz + 4 * 4,
11208                       (valueT) GNU_PROPERTY_X86_FEATURE_2_USED, 4);
11209
11210   /* Write 4-byte data size.  */
11211   md_number_to_chars (p + isa_1_descsz + 4 * 5, (valueT) 4, 4);
11212
11213   /* Write 4-byte data.  */
11214   md_number_to_chars (p + isa_1_descsz + 4 * 6,
11215                       (valueT) x86_feature_2_used, 4);
11216
11217   /* Zero out paddings.  */
11218   padding = feature_2_descsz - feature_2_descsz_raw;
11219   if (padding)
11220     memset (p + isa_1_descsz + 4 * 7, 0, padding);
11221
11222   /* We probably can't restore the current segment, for there likely
11223      isn't one yet...  */
11224   if (seg && subseg)
11225     subseg_set (seg, subseg);
11226 }
11227
11228 bool
11229 x86_support_sframe_p (void)
11230 {
11231   /* At this time, SFrame stack trace is supported for AMD64 ABI only.  */
11232   return (x86_elf_abi == X86_64_ABI);
11233 }
11234
11235 bool
11236 x86_sframe_ra_tracking_p (void)
11237 {
11238   /* In AMD64, return address is always stored on the stack at a fixed offset
11239      from the CFA (provided via x86_sframe_cfa_ra_offset ()).
11240      Do not track explicitly via an SFrame Frame Row Entry.  */
11241   return false;
11242 }
11243
11244 offsetT
11245 x86_sframe_cfa_ra_offset (void)
11246 {
11247   gas_assert (x86_elf_abi == X86_64_ABI);
11248   return (offsetT) -8;
11249 }
11250
11251 unsigned char
11252 x86_sframe_get_abi_arch (void)
11253 {
11254   unsigned char sframe_abi_arch = 0;
11255
11256   if (x86_support_sframe_p ())
11257     {
11258       gas_assert (!target_big_endian);
11259       sframe_abi_arch = SFRAME_ABI_AMD64_ENDIAN_LITTLE;
11260     }
11261
11262   return sframe_abi_arch;
11263 }
11264
11265 #endif
11266
11267 static unsigned int
11268 encoding_length (const fragS *start_frag, offsetT start_off,
11269                  const char *frag_now_ptr)
11270 {
11271   unsigned int len = 0;
11272
11273   if (start_frag != frag_now)
11274     {
11275       const fragS *fr = start_frag;
11276
11277       do {
11278         len += fr->fr_fix;
11279         fr = fr->fr_next;
11280       } while (fr && fr != frag_now);
11281     }
11282
11283   return len - start_off + (frag_now_ptr - frag_now->fr_literal);
11284 }
11285
11286 /* Return 1 for test, and, cmp, add, sub, inc and dec which may
11287    be macro-fused with conditional jumps.
11288    NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
11289    or is one of the following format:
11290
11291     cmp m, imm
11292     add m, imm
11293     sub m, imm
11294    test m, imm
11295     and m, imm
11296     inc m
11297     dec m
11298
11299    it is unfusible.  */
11300
11301 static int
11302 maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
11303 {
11304   /* No RIP address.  */
11305   if (i.base_reg && i.base_reg->reg_num == RegIP)
11306     return 0;
11307
11308   /* No opcodes outside of base encoding space.  */
11309   if (i.tm.opcode_space != SPACE_BASE)
11310     return 0;
11311
11312   /* add, sub without add/sub m, imm.  */
11313   if (i.tm.base_opcode <= 5
11314       || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
11315       || ((i.tm.base_opcode | 3) == 0x83
11316           && (i.tm.extension_opcode == 0x5
11317               || i.tm.extension_opcode == 0x0)))
11318     {
11319       *mf_cmp_p = mf_cmp_alu_cmp;
11320       return !(i.mem_operands && i.imm_operands);
11321     }
11322
11323   /* and without and m, imm.  */
11324   if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
11325       || ((i.tm.base_opcode | 3) == 0x83
11326           && i.tm.extension_opcode == 0x4))
11327     {
11328       *mf_cmp_p = mf_cmp_test_and;
11329       return !(i.mem_operands && i.imm_operands);
11330     }
11331
11332   /* test without test m imm.  */
11333   if ((i.tm.base_opcode | 1) == 0x85
11334       || (i.tm.base_opcode | 1) == 0xa9
11335       || ((i.tm.base_opcode | 1) == 0xf7
11336           && i.tm.extension_opcode == 0))
11337     {
11338       *mf_cmp_p = mf_cmp_test_and;
11339       return !(i.mem_operands && i.imm_operands);
11340     }
11341
11342   /* cmp without cmp m, imm.  */
11343   if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
11344       || ((i.tm.base_opcode | 3) == 0x83
11345           && (i.tm.extension_opcode == 0x7)))
11346     {
11347       *mf_cmp_p = mf_cmp_alu_cmp;
11348       return !(i.mem_operands && i.imm_operands);
11349     }
11350
11351   /* inc, dec without inc/dec m.   */
11352   if ((is_cpu (&i.tm, CpuNo64)
11353        && (i.tm.base_opcode | 0xf) == 0x4f)
11354       || ((i.tm.base_opcode | 1) == 0xff
11355           && i.tm.extension_opcode <= 0x1))
11356     {
11357       *mf_cmp_p = mf_cmp_incdec;
11358       return !i.mem_operands;
11359     }
11360
11361   return 0;
11362 }
11363
11364 /* Return 1 if a FUSED_JCC_PADDING frag should be generated.  */
11365
11366 static int
11367 add_fused_jcc_padding_frag_p (enum mf_cmp_kind *mf_cmp_p,
11368                               const struct last_insn *last_insn)
11369 {
11370   /* NB: Don't work with COND_JUMP86 without i386.  */
11371   if (!align_branch_power
11372       || now_seg == absolute_section
11373       || !cpu_arch_flags.bitfield.cpui386
11374       || !(align_branch & align_branch_fused_bit))
11375     return 0;
11376
11377   if (maybe_fused_with_jcc_p (mf_cmp_p))
11378     {
11379       if (last_insn->kind == last_insn_other)
11380         return 1;
11381       if (flag_debug)
11382         as_warn_where (last_insn->file, last_insn->line,
11383                        _("`%s` skips -malign-branch-boundary on `%s`"),
11384                        last_insn->name, insn_name (&i.tm));
11385     }
11386
11387   return 0;
11388 }
11389
11390 /* Return 1 if a BRANCH_PREFIX frag should be generated.  */
11391
11392 static int
11393 add_branch_prefix_frag_p (const struct last_insn *last_insn)
11394 {
11395   /* NB: Don't work with COND_JUMP86 without i386.  Don't add prefix
11396      to PadLock instructions since they include prefixes in opcode.  */
11397   if (!align_branch_power
11398       || !align_branch_prefix_size
11399       || now_seg == absolute_section
11400       || is_cpu (&i.tm, CpuPadLock)
11401       || !cpu_arch_flags.bitfield.cpui386)
11402     return 0;
11403
11404   /* Don't add prefix if it is a prefix or there is no operand in case
11405      that segment prefix is special.  */
11406   if (!i.operands || i.tm.opcode_modifier.isprefix)
11407     return 0;
11408
11409   if (last_insn->kind == last_insn_other)
11410     return 1;
11411
11412   if (flag_debug)
11413     as_warn_where (last_insn->file, last_insn->line,
11414                    _("`%s` skips -malign-branch-boundary on `%s`"),
11415                    last_insn->name, insn_name (&i.tm));
11416
11417   return 0;
11418 }
11419
11420 /* Return 1 if a BRANCH_PADDING frag should be generated.  */
11421
11422 static int
11423 add_branch_padding_frag_p (enum align_branch_kind *branch_p,
11424                            enum mf_jcc_kind *mf_jcc_p,
11425                            const struct last_insn *last_insn)
11426 {
11427   int add_padding;
11428
11429   /* NB: Don't work with COND_JUMP86 without i386.  */
11430   if (!align_branch_power
11431       || now_seg == absolute_section
11432       || !cpu_arch_flags.bitfield.cpui386
11433       || i.tm.opcode_space != SPACE_BASE)
11434     return 0;
11435
11436   add_padding = 0;
11437
11438   /* Check for jcc and direct jmp.  */
11439   if (i.tm.opcode_modifier.jump == JUMP)
11440     {
11441       if (i.tm.base_opcode == JUMP_PC_RELATIVE)
11442         {
11443           *branch_p = align_branch_jmp;
11444           add_padding = align_branch & align_branch_jmp_bit;
11445         }
11446       else
11447         {
11448           /* Because J<cc> and JN<cc> share same group in macro-fusible table,
11449              igore the lowest bit.  */
11450           *mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
11451           *branch_p = align_branch_jcc;
11452           if ((align_branch & align_branch_jcc_bit))
11453             add_padding = 1;
11454         }
11455     }
11456   else if ((i.tm.base_opcode | 1) == 0xc3)
11457     {
11458       /* Near ret.  */
11459       *branch_p = align_branch_ret;
11460       if ((align_branch & align_branch_ret_bit))
11461         add_padding = 1;
11462     }
11463   else
11464     {
11465       /* Check for indirect jmp, direct and indirect calls.  */
11466       if (i.tm.base_opcode == 0xe8)
11467         {
11468           /* Direct call.  */
11469           *branch_p = align_branch_call;
11470           if ((align_branch & align_branch_call_bit))
11471             add_padding = 1;
11472         }
11473       else if (i.tm.base_opcode == 0xff
11474                && (i.tm.extension_opcode == 2
11475                    || i.tm.extension_opcode == 4))
11476         {
11477           /* Indirect call and jmp.  */
11478           *branch_p = align_branch_indirect;
11479           if ((align_branch & align_branch_indirect_bit))
11480             add_padding = 1;
11481         }
11482
11483       if (add_padding
11484           && i.disp_operands
11485           && tls_get_addr
11486           && (i.op[0].disps->X_op == O_symbol
11487               || (i.op[0].disps->X_op == O_subtract
11488                   && i.op[0].disps->X_op_symbol == GOT_symbol)))
11489         {
11490           symbolS *s = i.op[0].disps->X_add_symbol;
11491           /* No padding to call to global or undefined tls_get_addr.  */
11492           if ((S_IS_EXTERNAL (s) || !S_IS_DEFINED (s))
11493               && strcmp (S_GET_NAME (s), tls_get_addr) == 0)
11494             return 0;
11495         }
11496     }
11497
11498   if (add_padding
11499       && last_insn->kind != last_insn_other)
11500     {
11501       if (flag_debug)
11502         as_warn_where (last_insn->file, last_insn->line,
11503                        _("`%s` skips -malign-branch-boundary on `%s`"),
11504                        last_insn->name, insn_name (&i.tm));
11505       return 0;
11506     }
11507
11508   return add_padding;
11509 }
11510
11511 static void
11512 output_insn (const struct last_insn *last_insn)
11513 {
11514   fragS *insn_start_frag;
11515   offsetT insn_start_off;
11516   fragS *fragP = NULL;
11517   enum align_branch_kind branch = align_branch_none;
11518   /* The initializer is arbitrary just to avoid uninitialized error.
11519      it's actually either assigned in add_branch_padding_frag_p
11520      or never be used.  */
11521   enum mf_jcc_kind mf_jcc = mf_jcc_jo;
11522
11523 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11524   if (IS_ELF && x86_used_note && now_seg != absolute_section)
11525     {
11526       if ((i.xstate & xstate_tmm) == xstate_tmm
11527           || is_cpu (&i.tm, CpuAMX_TILE))
11528         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_TMM;
11529
11530       if (is_cpu (&i.tm, Cpu8087)
11531           || is_cpu (&i.tm, Cpu287)
11532           || is_cpu (&i.tm, Cpu387)
11533           || is_cpu (&i.tm, Cpu687)
11534           || is_cpu (&i.tm, CpuFISTTP))
11535         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
11536
11537       if ((i.xstate & xstate_mmx)
11538           || i.tm.mnem_off == MN_emms
11539           || i.tm.mnem_off == MN_femms)
11540         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX;
11541
11542       if (i.index_reg)
11543         {
11544           if (i.index_reg->reg_type.bitfield.zmmword)
11545             i.xstate |= xstate_zmm;
11546           else if (i.index_reg->reg_type.bitfield.ymmword)
11547             i.xstate |= xstate_ymm;
11548           else if (i.index_reg->reg_type.bitfield.xmmword)
11549             i.xstate |= xstate_xmm;
11550         }
11551
11552       /* vzeroall / vzeroupper */
11553       if (i.tm.base_opcode == 0x77 && is_cpu (&i.tm, CpuAVX))
11554         i.xstate |= xstate_ymm;
11555
11556       if ((i.xstate & xstate_xmm)
11557           /* ldmxcsr / stmxcsr / vldmxcsr / vstmxcsr */
11558           || (i.tm.base_opcode == 0xae
11559               && (is_cpu (&i.tm, CpuSSE)
11560                   || is_cpu (&i.tm, CpuAVX)))
11561           || is_cpu (&i.tm, CpuWideKL)
11562           || is_cpu (&i.tm, CpuKL))
11563         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
11564
11565       if ((i.xstate & xstate_ymm) == xstate_ymm)
11566         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_YMM;
11567       if ((i.xstate & xstate_zmm) == xstate_zmm)
11568         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM;
11569       if (i.mask.reg || (i.xstate & xstate_mask) == xstate_mask)
11570         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MASK;
11571       if (is_cpu (&i.tm, CpuFXSR))
11572         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR;
11573       if (is_cpu (&i.tm, CpuXsave))
11574         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVE;
11575       if (is_cpu (&i.tm, CpuXsaveopt))
11576         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT;
11577       if (is_cpu (&i.tm, CpuXSAVEC))
11578         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC;
11579
11580       if (x86_feature_2_used
11581           || is_cpu (&i.tm, CpuCMOV)
11582           || is_cpu (&i.tm, CpuSYSCALL)
11583           || i.tm.mnem_off == MN_cmpxchg8b)
11584         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_BASELINE;
11585       if (is_cpu (&i.tm, CpuSSE3)
11586           || is_cpu (&i.tm, CpuSSSE3)
11587           || is_cpu (&i.tm, CpuSSE4_1)
11588           || is_cpu (&i.tm, CpuSSE4_2)
11589           || is_cpu (&i.tm, CpuCX16)
11590           || is_cpu (&i.tm, CpuPOPCNT)
11591           /* LAHF-SAHF insns in 64-bit mode.  */
11592           || (flag_code == CODE_64BIT
11593               && (i.tm.base_opcode | 1) == 0x9f
11594               && i.tm.opcode_space == SPACE_BASE))
11595         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V2;
11596       if (is_cpu (&i.tm, CpuAVX)
11597           || is_cpu (&i.tm, CpuAVX2)
11598           /* Any VEX encoded insns execpt for AVX512F, AVX512BW, AVX512DQ,
11599              XOP, FMA4, LPW, TBM, and AMX.  */
11600           || (i.tm.opcode_modifier.vex
11601               && !is_cpu (&i.tm, CpuAVX512F)
11602               && !is_cpu (&i.tm, CpuAVX512BW)
11603               && !is_cpu (&i.tm, CpuAVX512DQ)
11604               && !is_cpu (&i.tm, CpuXOP)
11605               && !is_cpu (&i.tm, CpuFMA4)
11606               && !is_cpu (&i.tm, CpuLWP)
11607               && !is_cpu (&i.tm, CpuTBM)
11608               && !(x86_feature_2_used & GNU_PROPERTY_X86_FEATURE_2_TMM))
11609           || is_cpu (&i.tm, CpuF16C)
11610           || is_cpu (&i.tm, CpuFMA)
11611           || is_cpu (&i.tm, CpuLZCNT)
11612           || is_cpu (&i.tm, CpuMovbe)
11613           || is_cpu (&i.tm, CpuXSAVES)
11614           || (x86_feature_2_used
11615               & (GNU_PROPERTY_X86_FEATURE_2_XSAVE
11616                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT
11617                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEC)) != 0)
11618         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V3;
11619       if (is_cpu (&i.tm, CpuAVX512F)
11620           || is_cpu (&i.tm, CpuAVX512BW)
11621           || is_cpu (&i.tm, CpuAVX512DQ)
11622           || is_cpu (&i.tm, CpuAVX512VL)
11623           /* Any EVEX encoded insns except for AVX512ER, AVX512PF,
11624              AVX512-4FMAPS, and AVX512-4VNNIW.  */
11625           || (i.tm.opcode_modifier.evex
11626               && !is_cpu (&i.tm, CpuAVX512ER)
11627               && !is_cpu (&i.tm, CpuAVX512PF)
11628               && !is_cpu (&i.tm, CpuAVX512_4FMAPS)
11629               && !is_cpu (&i.tm, CpuAVX512_4VNNIW)))
11630         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V4;
11631     }
11632 #endif
11633
11634   /* Tie dwarf2 debug info to the address at the start of the insn.
11635      We can't do this after the insn has been output as the current
11636      frag may have been closed off.  eg. by frag_var.  */
11637   dwarf2_emit_insn (0);
11638
11639   insn_start_frag = frag_now;
11640   insn_start_off = frag_now_fix ();
11641
11642   if (add_branch_padding_frag_p (&branch, &mf_jcc, last_insn))
11643     {
11644       char *p;
11645       /* Branch can be 8 bytes.  Leave some room for prefixes.  */
11646       unsigned int max_branch_padding_size = 14;
11647
11648       /* Align section to boundary.  */
11649       record_alignment (now_seg, align_branch_power);
11650
11651       /* Make room for padding.  */
11652       frag_grow (max_branch_padding_size);
11653
11654       /* Start of the padding.  */
11655       p = frag_more (0);
11656
11657       fragP = frag_now;
11658
11659       frag_var (rs_machine_dependent, max_branch_padding_size, 0,
11660                 ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
11661                 NULL, 0, p);
11662
11663       fragP->tc_frag_data.mf_type = mf_jcc;
11664       fragP->tc_frag_data.branch_type = branch;
11665       fragP->tc_frag_data.max_bytes = max_branch_padding_size;
11666     }
11667
11668   if (!cpu_arch_flags.bitfield.cpui386 && (flag_code != CODE_16BIT)
11669       && !pre_386_16bit_warned)
11670     {
11671       as_warn (_("use .code16 to ensure correct addressing mode"));
11672       pre_386_16bit_warned = true;
11673     }
11674
11675   /* Output jumps.  */
11676   if (i.tm.opcode_modifier.jump == JUMP)
11677     output_branch ();
11678   else if (i.tm.opcode_modifier.jump == JUMP_BYTE
11679            || i.tm.opcode_modifier.jump == JUMP_DWORD)
11680     output_jump ();
11681   else if (i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT)
11682     output_interseg_jump ();
11683   else
11684     {
11685       /* Output normal instructions here.  */
11686       char *p;
11687       unsigned char *q;
11688       unsigned int j;
11689       enum mf_cmp_kind mf_cmp;
11690
11691       if (avoid_fence
11692           && (i.tm.base_opcode == 0xaee8
11693               || i.tm.base_opcode == 0xaef0
11694               || i.tm.base_opcode == 0xaef8))
11695         {
11696           /* Encode lfence, mfence, and sfence as
11697              f0 83 04 24 00   lock addl $0x0, (%{re}sp).  */
11698           if (flag_code == CODE_16BIT)
11699             as_bad (_("Cannot convert `%s' in 16-bit mode"), insn_name (&i.tm));
11700           else if (omit_lock_prefix)
11701             as_bad (_("Cannot convert `%s' with `-momit-lock-prefix=yes' in effect"),
11702                     insn_name (&i.tm));
11703           else if (now_seg != absolute_section)
11704             {
11705               offsetT val = 0x240483f0ULL;
11706
11707               p = frag_more (5);
11708               md_number_to_chars (p, val, 5);
11709             }
11710           else
11711             abs_section_offset += 5;
11712           return;
11713         }
11714
11715       /* Some processors fail on LOCK prefix. This options makes
11716          assembler ignore LOCK prefix and serves as a workaround.  */
11717       if (omit_lock_prefix)
11718         {
11719           if (i.tm.base_opcode == LOCK_PREFIX_OPCODE
11720               && i.tm.opcode_modifier.isprefix)
11721             return;
11722           i.prefix[LOCK_PREFIX] = 0;
11723         }
11724
11725       if (branch)
11726         /* Skip if this is a branch.  */
11727         ;
11728       else if (add_fused_jcc_padding_frag_p (&mf_cmp, last_insn))
11729         {
11730           /* Make room for padding.  */
11731           frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
11732           p = frag_more (0);
11733
11734           fragP = frag_now;
11735
11736           frag_var (rs_machine_dependent, MAX_FUSED_JCC_PADDING_SIZE, 0,
11737                     ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
11738                     NULL, 0, p);
11739
11740           fragP->tc_frag_data.mf_type = mf_cmp;
11741           fragP->tc_frag_data.branch_type = align_branch_fused;
11742           fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
11743         }
11744       else if (add_branch_prefix_frag_p (last_insn))
11745         {
11746           unsigned int max_prefix_size = align_branch_prefix_size;
11747
11748           /* Make room for padding.  */
11749           frag_grow (max_prefix_size);
11750           p = frag_more (0);
11751
11752           fragP = frag_now;
11753
11754           frag_var (rs_machine_dependent, max_prefix_size, 0,
11755                     ENCODE_RELAX_STATE (BRANCH_PREFIX, 0),
11756                     NULL, 0, p);
11757
11758           fragP->tc_frag_data.max_bytes = max_prefix_size;
11759         }
11760
11761       /* Since the VEX/EVEX prefix contains the implicit prefix, we
11762          don't need the explicit prefix.  */
11763       if (!is_any_vex_encoding (&i.tm))
11764         {
11765           switch (i.tm.opcode_modifier.opcodeprefix)
11766             {
11767             case PREFIX_0X66:
11768               add_prefix (0x66);
11769               break;
11770             case PREFIX_0XF2:
11771               add_prefix (0xf2);
11772               break;
11773             case PREFIX_0XF3:
11774               if (!is_cpu (&i.tm, CpuPadLock)
11775                   || (i.prefix[REP_PREFIX] != 0xf3))
11776                 add_prefix (0xf3);
11777               break;
11778             case PREFIX_NONE:
11779               switch (i.opcode_length)
11780                 {
11781                 case 2:
11782                   break;
11783                 case 1:
11784                   /* Check for pseudo prefixes.  */
11785                   if (!i.tm.opcode_modifier.isprefix || i.tm.base_opcode)
11786                     break;
11787                   as_bad_where (insn_start_frag->fr_file,
11788                                 insn_start_frag->fr_line,
11789                                 _("pseudo prefix without instruction"));
11790                   return;
11791                 default:
11792                   abort ();
11793                 }
11794               break;
11795             default:
11796               abort ();
11797             }
11798
11799 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
11800           /* For x32, add a dummy REX_OPCODE prefix for mov/add with
11801              R_X86_64_GOTTPOFF relocation so that linker can safely
11802              perform IE->LE optimization.  A dummy REX_OPCODE prefix
11803              is also needed for lea with R_X86_64_GOTPC32_TLSDESC
11804              relocation for GDesc -> IE/LE optimization.  */
11805           if (x86_elf_abi == X86_64_X32_ABI
11806               && !is_apx_rex2_encoding ()
11807               && i.operands == 2
11808               && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
11809                   || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
11810               && i.prefix[REX_PREFIX] == 0)
11811             add_prefix (REX_OPCODE);
11812 #endif
11813
11814           /* The prefix bytes.  */
11815           for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
11816             if (*q)
11817               frag_opcode_byte (*q);
11818
11819           if (is_apx_rex2_encoding ())
11820             {
11821               frag_opcode_byte (i.vex.bytes[0]);
11822               frag_opcode_byte (i.vex.bytes[1]);
11823             }
11824         }
11825       else
11826         {
11827           for (j = 0, q = i.prefix; j < ARRAY_SIZE (i.prefix); j++, q++)
11828             if (*q)
11829               switch (j)
11830                 {
11831                 case SEG_PREFIX:
11832                 case ADDR_PREFIX:
11833                   frag_opcode_byte (*q);
11834                   break;
11835                 default:
11836                   /* There should be no other prefixes for instructions
11837                      with VEX prefix.  */
11838                   abort ();
11839                 }
11840
11841           /* For EVEX instructions i.vrex should become 0 after
11842              build_evex_prefix.  For VEX instructions upper 16 registers
11843              aren't available, so VREX should be 0.  */
11844           if (i.vrex)
11845             abort ();
11846           /* Now the VEX prefix.  */
11847           if (now_seg != absolute_section)
11848             {
11849               p = frag_more (i.vex.length);
11850               for (j = 0; j < i.vex.length; j++)
11851                 p[j] = i.vex.bytes[j];
11852             }
11853           else
11854             abs_section_offset += i.vex.length;
11855         }
11856
11857       /* Now the opcode; be careful about word order here!  */
11858       j = i.opcode_length;
11859       if (!i.vex.length)
11860         switch (i.tm.opcode_space)
11861           {
11862           case SPACE_BASE:
11863             break;
11864           case SPACE_0F:
11865             ++j;
11866             break;
11867           case SPACE_0F38:
11868           case SPACE_0F3A:
11869             j += 2;
11870             break;
11871           default:
11872             abort ();
11873           }
11874
11875       if (now_seg == absolute_section)
11876         abs_section_offset += j;
11877       else if (j == 1)
11878         {
11879           FRAG_APPEND_1_CHAR (i.tm.base_opcode);
11880         }
11881       else
11882         {
11883           p = frag_more (j);
11884           if (!i.vex.length
11885               && i.tm.opcode_space != SPACE_BASE)
11886             {
11887               *p++ = 0x0f;
11888               if (i.tm.opcode_space != SPACE_0F)
11889                 *p++ = i.tm.opcode_space == SPACE_0F38
11890                        ? 0x38 : 0x3a;
11891             }
11892
11893           switch (i.opcode_length)
11894             {
11895             case 2:
11896               /* Put out high byte first: can't use md_number_to_chars!  */
11897               *p++ = (i.tm.base_opcode >> 8) & 0xff;
11898               /* Fall through.  */
11899             case 1:
11900               *p = i.tm.base_opcode & 0xff;
11901               break;
11902             default:
11903               abort ();
11904               break;
11905             }
11906
11907         }
11908
11909       /* Now the modrm byte and sib byte (if present).  */
11910       if (i.tm.opcode_modifier.modrm)
11911         {
11912           frag_opcode_byte ((i.rm.regmem << 0)
11913                              | (i.rm.reg << 3)
11914                              | (i.rm.mode << 6));
11915           /* If i.rm.regmem == ESP (4)
11916              && i.rm.mode != (Register mode)
11917              && not 16 bit
11918              ==> need second modrm byte.  */
11919           if (i.rm.regmem == ESCAPE_TO_TWO_BYTE_ADDRESSING
11920               && i.rm.mode != 3
11921               && !(i.base_reg && i.base_reg->reg_type.bitfield.word))
11922             frag_opcode_byte ((i.sib.base << 0)
11923                               | (i.sib.index << 3)
11924                               | (i.sib.scale << 6));
11925         }
11926
11927       if (i.disp_operands)
11928         output_disp (insn_start_frag, insn_start_off);
11929
11930       if (i.imm_operands)
11931         output_imm (insn_start_frag, insn_start_off);
11932
11933       /*
11934        * frag_now_fix () returning plain abs_section_offset when we're in the
11935        * absolute section, and abs_section_offset not getting updated as data
11936        * gets added to the frag breaks the logic below.
11937        */
11938       if (now_seg != absolute_section)
11939         {
11940           j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
11941           if (j > 15)
11942             {
11943               if (dot_insn ())
11944                 as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
11945                         j);
11946               else
11947                 as_bad (_("instruction length of %u bytes exceeds the limit of 15"),
11948                         j);
11949             }
11950           else if (fragP)
11951             {
11952               /* NB: Don't add prefix with GOTPC relocation since
11953                  output_disp() above depends on the fixed encoding
11954                  length.  Can't add prefix with TLS relocation since
11955                  it breaks TLS linker optimization.  */
11956               unsigned int max = i.has_gotpc_tls_reloc ? 0 : 15 - j;
11957               /* Prefix count on the current instruction.  */
11958               unsigned int count = i.vex.length;
11959               unsigned int k;
11960               for (k = 0; k < ARRAY_SIZE (i.prefix); k++)
11961                 /* REX byte is encoded in VEX/EVEX prefix.  */
11962                 if (i.prefix[k] && (k != REX_PREFIX || !i.vex.length))
11963                   count++;
11964
11965               /* Count prefixes for extended opcode maps.  */
11966               if (!i.vex.length)
11967                 switch (i.tm.opcode_space)
11968                   {
11969                   case SPACE_BASE:
11970                     break;
11971                   case SPACE_0F:
11972                     count++;
11973                     break;
11974                   case SPACE_0F38:
11975                   case SPACE_0F3A:
11976                     count += 2;
11977                     break;
11978                   default:
11979                     abort ();
11980                   }
11981
11982               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
11983                   == BRANCH_PREFIX)
11984                 {
11985                   /* Set the maximum prefix size in BRANCH_PREFIX
11986                      frag.  */
11987                   if (fragP->tc_frag_data.max_bytes > max)
11988                     fragP->tc_frag_data.max_bytes = max;
11989                   if (fragP->tc_frag_data.max_bytes > count)
11990                     fragP->tc_frag_data.max_bytes -= count;
11991                   else
11992                     fragP->tc_frag_data.max_bytes = 0;
11993                 }
11994               else
11995                 {
11996                   /* Remember the maximum prefix size in FUSED_JCC_PADDING
11997                      frag.  */
11998                   unsigned int max_prefix_size;
11999                   if (align_branch_prefix_size > max)
12000                     max_prefix_size = max;
12001                   else
12002                     max_prefix_size = align_branch_prefix_size;
12003                   if (max_prefix_size > count)
12004                     fragP->tc_frag_data.max_prefix_length
12005                       = max_prefix_size - count;
12006                 }
12007
12008               /* Use existing segment prefix if possible.  Use CS
12009                  segment prefix in 64-bit mode.  In 32-bit mode, use SS
12010                  segment prefix with ESP/EBP base register and use DS
12011                  segment prefix without ESP/EBP base register.  */
12012               if (i.prefix[SEG_PREFIX])
12013                 fragP->tc_frag_data.default_prefix = i.prefix[SEG_PREFIX];
12014               else if (flag_code == CODE_64BIT)
12015                 fragP->tc_frag_data.default_prefix = CS_PREFIX_OPCODE;
12016               else if (i.base_reg
12017                        && (i.base_reg->reg_num == 4
12018                            || i.base_reg->reg_num == 5))
12019                 fragP->tc_frag_data.default_prefix = SS_PREFIX_OPCODE;
12020               else
12021                 fragP->tc_frag_data.default_prefix = DS_PREFIX_OPCODE;
12022             }
12023         }
12024     }
12025
12026   /* NB: Don't work with COND_JUMP86 without i386.  */
12027   if (align_branch_power
12028       && now_seg != absolute_section
12029       && cpu_arch_flags.bitfield.cpui386)
12030     {
12031       /* Terminate each frag so that we can add prefix and check for
12032          fused jcc.  */
12033       frag_wane (frag_now);
12034       frag_new (0);
12035     }
12036
12037 #ifdef DEBUG386
12038   if (flag_debug)
12039     {
12040       pi ("" /*line*/, &i);
12041     }
12042 #endif /* DEBUG386  */
12043 }
12044
12045 /* Return the size of the displacement operand N.  */
12046
12047 static int
12048 disp_size (unsigned int n)
12049 {
12050   int size = 4;
12051
12052   if (i.types[n].bitfield.disp64)
12053     size = 8;
12054   else if (i.types[n].bitfield.disp8)
12055     size = 1;
12056   else if (i.types[n].bitfield.disp16)
12057     size = 2;
12058   return size;
12059 }
12060
12061 /* Return the size of the immediate operand N.  */
12062
12063 static int
12064 imm_size (unsigned int n)
12065 {
12066   int size = 4;
12067   if (i.types[n].bitfield.imm64)
12068     size = 8;
12069   else if (i.types[n].bitfield.imm8 || i.types[n].bitfield.imm8s)
12070     size = 1;
12071   else if (i.types[n].bitfield.imm16)
12072     size = 2;
12073   return size;
12074 }
12075
12076 static void
12077 output_disp (fragS *insn_start_frag, offsetT insn_start_off)
12078 {
12079   char *p;
12080   unsigned int n;
12081
12082   for (n = 0; n < i.operands; n++)
12083     {
12084       if (operand_type_check (i.types[n], disp))
12085         {
12086           int size = disp_size (n);
12087
12088           if (now_seg == absolute_section)
12089             abs_section_offset += size;
12090           else if (i.op[n].disps->X_op == O_constant)
12091             {
12092               offsetT val = i.op[n].disps->X_add_number;
12093
12094               val = offset_in_range (val >> (size == 1 ? i.memshift : 0),
12095                                      size);
12096               p = frag_more (size);
12097               md_number_to_chars (p, val, size);
12098             }
12099           else
12100             {
12101               enum bfd_reloc_code_real reloc_type;
12102               bool pcrel = (i.flags[n] & Operand_PCrel) != 0;
12103               bool sign = (flag_code == CODE_64BIT && size == 4
12104                            && (!want_disp32 (&i.tm)
12105                                || (i.tm.opcode_modifier.jump && !i.jumpabsolute
12106                                    && !i.types[n].bitfield.baseindex)))
12107                           || pcrel;
12108               fixS *fixP;
12109
12110               /* We can't have 8 bit displacement here.  */
12111               gas_assert (!i.types[n].bitfield.disp8);
12112
12113               /* The PC relative address is computed relative
12114                  to the instruction boundary, so in case immediate
12115                  fields follows, we need to adjust the value.  */
12116               if (pcrel && i.imm_operands)
12117                 {
12118                   unsigned int n1;
12119                   int sz = 0;
12120
12121                   for (n1 = 0; n1 < i.operands; n1++)
12122                     if (operand_type_check (i.types[n1], imm))
12123                       {
12124                         /* Only one immediate is allowed for PC
12125                            relative address, except with .insn.  */
12126                         gas_assert (sz == 0 || dot_insn ());
12127                         sz += imm_size (n1);
12128                       }
12129                   /* We should find at least one immediate.  */
12130                   gas_assert (sz != 0);
12131                   i.op[n].disps->X_add_number -= sz;
12132                 }
12133
12134               p = frag_more (size);
12135               reloc_type = reloc (size, pcrel, sign, i.reloc[n]);
12136               if (GOT_symbol
12137                   && GOT_symbol == i.op[n].disps->X_add_symbol
12138                   && (((reloc_type == BFD_RELOC_32
12139                         || reloc_type == BFD_RELOC_X86_64_32S
12140                         || (reloc_type == BFD_RELOC_64
12141                             && object_64bit))
12142                        && (i.op[n].disps->X_op == O_symbol
12143                            || (i.op[n].disps->X_op == O_add
12144                                && ((symbol_get_value_expression
12145                                     (i.op[n].disps->X_op_symbol)->X_op)
12146                                    == O_subtract))))
12147                       || reloc_type == BFD_RELOC_32_PCREL))
12148                 {
12149                   if (!object_64bit)
12150                     {
12151                       reloc_type = BFD_RELOC_386_GOTPC;
12152                       i.has_gotpc_tls_reloc = true;
12153                       i.op[n].disps->X_add_number +=
12154                         encoding_length (insn_start_frag, insn_start_off, p);
12155                     }
12156                   else if (reloc_type == BFD_RELOC_64)
12157                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
12158                   else
12159                     /* Don't do the adjustment for x86-64, as there
12160                        the pcrel addressing is relative to the _next_
12161                        insn, and that is taken care of in other code.  */
12162                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
12163                 }
12164               else if (align_branch_power)
12165                 {
12166                   switch (reloc_type)
12167                     {
12168                     case BFD_RELOC_386_TLS_GD:
12169                     case BFD_RELOC_386_TLS_LDM:
12170                     case BFD_RELOC_386_TLS_IE:
12171                     case BFD_RELOC_386_TLS_IE_32:
12172                     case BFD_RELOC_386_TLS_GOTIE:
12173                     case BFD_RELOC_386_TLS_GOTDESC:
12174                     case BFD_RELOC_386_TLS_DESC_CALL:
12175                     case BFD_RELOC_X86_64_TLSGD:
12176                     case BFD_RELOC_X86_64_TLSLD:
12177                     case BFD_RELOC_X86_64_GOTTPOFF:
12178                     case BFD_RELOC_X86_64_CODE_4_GOTTPOFF:
12179                     case BFD_RELOC_X86_64_CODE_6_GOTTPOFF:
12180                     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
12181                     case BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC:
12182                     case BFD_RELOC_X86_64_TLSDESC_CALL:
12183                       i.has_gotpc_tls_reloc = true;
12184                     default:
12185                       break;
12186                     }
12187                 }
12188               fixP = fix_new_exp (frag_now, p - frag_now->fr_literal,
12189                                   size, i.op[n].disps, pcrel,
12190                                   reloc_type);
12191
12192               if (flag_code == CODE_64BIT && size == 4 && pcrel
12193                   && !i.prefix[ADDR_PREFIX])
12194                 fixP->fx_signed = 1;
12195
12196               if (reloc_type == BFD_RELOC_X86_64_GOTTPOFF
12197                   && i.tm.opcode_space == SPACE_EVEXMAP4)
12198                 {
12199                   /* Only "add %reg1, foo@gottpoff(%rip), %reg2" is
12200                      allowed in md_assemble.  Set fx_tcbit2 for EVEX
12201                      prefix.  */
12202                   fixP->fx_tcbit2 = 1;
12203                   continue;
12204                 }
12205
12206               if (i.base_reg && i.base_reg->reg_num == RegIP)
12207                 {
12208                   if (reloc_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
12209                     {
12210                       /* Set fx_tcbit for REX2 prefix.  */
12211                       if (is_apx_rex2_encoding ())
12212                         fixP->fx_tcbit = 1;
12213                       continue;
12214                     }
12215                 }
12216               /* In 64-bit, i386_validate_fix updates only (%rip)
12217                  relocations.  */
12218               else if (object_64bit)
12219                 continue;
12220
12221               /* Check for "call/jmp *mem", "mov mem, %reg",
12222                  "test %reg, mem" and "binop mem, %reg" where binop
12223                  is one of adc, add, and, cmp, or, sbb, sub, xor
12224                  instructions without data prefix.  Always generate
12225                  R_386_GOT32X for "sym*GOT" operand in 32-bit mode.  */
12226               if (i.prefix[DATA_PREFIX] == 0
12227                   && (i.rm.mode == 2
12228                       || (i.rm.mode == 0 && i.rm.regmem == 5))
12229                   && i.tm.opcode_space == SPACE_BASE
12230                   && ((i.operands == 1
12231                        && i.tm.base_opcode == 0xff
12232                        && (i.rm.reg == 2 || i.rm.reg == 4))
12233                       || (i.operands == 2
12234                           && (i.tm.base_opcode == 0x8b
12235                               || i.tm.base_opcode == 0x85
12236                               || (i.tm.base_opcode & ~0x38) == 0x03))))
12237                 {
12238                   if (object_64bit)
12239                     {
12240                       if (reloc_type == BFD_RELOC_X86_64_GOTTPOFF)
12241                         {
12242                           /* Set fx_tcbit for REX2 prefix.  */
12243                           if (is_apx_rex2_encoding ())
12244                             fixP->fx_tcbit = 1;
12245                         }
12246                       else if (generate_relax_relocations)
12247                         {
12248                           /* Set fx_tcbit3 for REX2 prefix.  */
12249                           if (is_apx_rex2_encoding ())
12250                             fixP->fx_tcbit3 = 1;
12251                           else if (i.rex)
12252                             fixP->fx_tcbit2 = 1;
12253                           else
12254                             fixP->fx_tcbit = 1;
12255                         }
12256                     }
12257                   else if (generate_relax_relocations
12258                            || (i.rm.mode == 0 && i.rm.regmem == 5))
12259                     fixP->fx_tcbit2 = 1;
12260                 }
12261             }
12262         }
12263     }
12264 }
12265
12266 static void
12267 output_imm (fragS *insn_start_frag, offsetT insn_start_off)
12268 {
12269   char *p;
12270   unsigned int n;
12271
12272   for (n = 0; n < i.operands; n++)
12273     {
12274       if (operand_type_check (i.types[n], imm))
12275         {
12276           int size = imm_size (n);
12277
12278           if (now_seg == absolute_section)
12279             abs_section_offset += size;
12280           else if (i.op[n].imms->X_op == O_constant)
12281             {
12282               offsetT val;
12283
12284               val = offset_in_range (i.op[n].imms->X_add_number,
12285                                      size);
12286               p = frag_more (size);
12287               md_number_to_chars (p, val, size);
12288             }
12289           else
12290             {
12291               /* Not absolute_section.
12292                  Need a 32-bit fixup (don't support 8bit
12293                  non-absolute imms).  Try to support other
12294                  sizes ...  */
12295               enum bfd_reloc_code_real reloc_type;
12296               int sign;
12297
12298               if (i.types[n].bitfield.imm32s
12299                   && (i.suffix == QWORD_MNEM_SUFFIX
12300                       || (!i.suffix && i.tm.opcode_modifier.no_lsuf)
12301                       || (i.prefix[REX_PREFIX] & REX_W)
12302                       || dot_insn ()))
12303                 sign = 1;
12304               else
12305                 sign = 0;
12306
12307               p = frag_more (size);
12308               reloc_type = reloc (size, 0, sign, i.reloc[n]);
12309
12310               /*   This is tough to explain.  We end up with this one if we
12311                * have operands that look like
12312                * "_GLOBAL_OFFSET_TABLE_+[.-.L284]".  The goal here is to
12313                * obtain the absolute address of the GOT, and it is strongly
12314                * preferable from a performance point of view to avoid using
12315                * a runtime relocation for this.  The actual sequence of
12316                * instructions often look something like:
12317                *
12318                *        call    .L66
12319                * .L66:
12320                *        popl    %ebx
12321                *        addl    $_GLOBAL_OFFSET_TABLE_+[.-.L66],%ebx
12322                *
12323                *   The call and pop essentially return the absolute address
12324                * of the label .L66 and store it in %ebx.  The linker itself
12325                * will ultimately change the first operand of the addl so
12326                * that %ebx points to the GOT, but to keep things simple, the
12327                * .o file must have this operand set so that it generates not
12328                * the absolute address of .L66, but the absolute address of
12329                * itself.  This allows the linker itself simply treat a GOTPC
12330                * relocation as asking for a pcrel offset to the GOT to be
12331                * added in, and the addend of the relocation is stored in the
12332                * operand field for the instruction itself.
12333                *
12334                *   Our job here is to fix the operand so that it would add
12335                * the correct offset so that %ebx would point to itself.  The
12336                * thing that is tricky is that .-.L66 will point to the
12337                * beginning of the instruction, so we need to further modify
12338                * the operand so that it will point to itself.  There are
12339                * other cases where you have something like:
12340                *
12341                *        .long   $_GLOBAL_OFFSET_TABLE_+[.-.L66]
12342                *
12343                * and here no correction would be required.  Internally in
12344                * the assembler we treat operands of this form as not being
12345                * pcrel since the '.' is explicitly mentioned, and I wonder
12346                * whether it would simplify matters to do it this way.  Who
12347                * knows.  In earlier versions of the PIC patches, the
12348                * pcrel_adjust field was used to store the correction, but
12349                * since the expression is not pcrel, I felt it would be
12350                * confusing to do it this way.  */
12351
12352               if ((reloc_type == BFD_RELOC_32
12353                    || reloc_type == BFD_RELOC_X86_64_32S
12354                    || reloc_type == BFD_RELOC_64)
12355                   && GOT_symbol
12356                   && GOT_symbol == i.op[n].imms->X_add_symbol
12357                   && (i.op[n].imms->X_op == O_symbol
12358                       || (i.op[n].imms->X_op == O_add
12359                           && ((symbol_get_value_expression
12360                                (i.op[n].imms->X_op_symbol)->X_op)
12361                               == O_subtract))))
12362                 {
12363                   if (!object_64bit)
12364                     reloc_type = BFD_RELOC_386_GOTPC;
12365                   else if (size == 4)
12366                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
12367                   else if (size == 8)
12368                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
12369                   i.has_gotpc_tls_reloc = true;
12370                   i.op[n].imms->X_add_number +=
12371                     encoding_length (insn_start_frag, insn_start_off, p);
12372                 }
12373               fix_new_exp (frag_now, p - frag_now->fr_literal, size,
12374                            i.op[n].imms, 0, reloc_type);
12375             }
12376         }
12377     }
12378 }
12379 \f
12380 /* x86_cons_fix_new is called via the expression parsing code when a
12381    reloc is needed.  We use this hook to get the correct .got reloc.  */
12382 static int cons_sign = -1;
12383
12384 void
12385 x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
12386                   expressionS *exp, bfd_reloc_code_real_type r)
12387 {
12388   r = reloc (len, 0, cons_sign, r);
12389
12390 #ifdef TE_PE
12391   if (exp->X_op == O_secrel)
12392     {
12393       exp->X_op = O_symbol;
12394       r = BFD_RELOC_32_SECREL;
12395     }
12396   else if (exp->X_op == O_secidx)
12397     r = BFD_RELOC_16_SECIDX;
12398 #endif
12399
12400   fix_new_exp (frag, off, len, exp, 0, r);
12401 }
12402
12403 /* Export the ABI address size for use by TC_ADDRESS_BYTES for the
12404    purpose of the `.dc.a' internal pseudo-op.  */
12405
12406 int
12407 x86_address_bytes (void)
12408 {
12409   if ((stdoutput->arch_info->mach & bfd_mach_x64_32))
12410     return 4;
12411   return stdoutput->arch_info->bits_per_address / 8;
12412 }
12413
12414 #if (!(defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) || defined (OBJ_MACH_O)) \
12415      || defined (LEX_AT)) && !defined (TE_PE)
12416 # define lex_got(reloc, adjust, types) NULL
12417 #else
12418 /* Parse operands of the form
12419    <symbol>@GOTOFF+<nnn>
12420    and similar .plt or .got references.
12421
12422    If we find one, set up the correct relocation in RELOC and copy the
12423    input string, minus the `@GOTOFF' into a malloc'd buffer for
12424    parsing by the calling routine.  Return this buffer, and if ADJUST
12425    is non-null set it to the length of the string we removed from the
12426    input line.  Otherwise return NULL.  */
12427 static char *
12428 lex_got (enum bfd_reloc_code_real *rel,
12429          int *adjust,
12430          i386_operand_type *types)
12431 {
12432   /* Some of the relocations depend on the size of what field is to
12433      be relocated.  But in our callers i386_immediate and i386_displacement
12434      we don't yet know the operand size (this will be set by insn
12435      matching).  Hence we record the word32 relocation here,
12436      and adjust the reloc according to the real size in reloc().  */
12437   static const struct
12438   {
12439     const char *str;
12440     int len;
12441     const enum bfd_reloc_code_real rel[2];
12442     const i386_operand_type types64;
12443     bool need_GOT_symbol;
12444   }
12445     gotrel[] =
12446   {
12447
12448 #define OPERAND_TYPE_IMM32_32S_DISP32 { .bitfield = \
12449   { .imm32 = 1, .imm32s = 1, .disp32 = 1 } }
12450 #define OPERAND_TYPE_IMM32_32S_64_DISP32 { .bitfield = \
12451   { .imm32 = 1, .imm32s = 1, .imm64 = 1, .disp32 = 1 } }
12452 #define OPERAND_TYPE_IMM32_32S_64_DISP32_64 { .bitfield = \
12453   { .imm32 = 1, .imm32s = 1, .imm64 = 1, .disp32 = 1, .disp64 = 1 } }
12454 #define OPERAND_TYPE_IMM64_DISP64 { .bitfield = \
12455   { .imm64 = 1, .disp64 = 1 } }
12456
12457 #ifndef TE_PE
12458 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12459     { STRING_COMMA_LEN ("SIZE"),      { BFD_RELOC_SIZE32,
12460                                         BFD_RELOC_SIZE32 },
12461       { .bitfield = { .imm32 = 1, .imm64 = 1 } }, false },
12462 #endif
12463     { STRING_COMMA_LEN ("PLTOFF"),   { _dummy_first_bfd_reloc_code_real,
12464                                        BFD_RELOC_X86_64_PLTOFF64 },
12465       { .bitfield = { .imm64 = 1 } }, true },
12466     { STRING_COMMA_LEN ("PLT"),      { BFD_RELOC_386_PLT32,
12467                                        BFD_RELOC_X86_64_PLT32    },
12468       OPERAND_TYPE_IMM32_32S_DISP32, false },
12469     { STRING_COMMA_LEN ("GOTPLT"),   { _dummy_first_bfd_reloc_code_real,
12470                                        BFD_RELOC_X86_64_GOTPLT64 },
12471       OPERAND_TYPE_IMM64_DISP64, true },
12472     { STRING_COMMA_LEN ("GOTOFF"),   { BFD_RELOC_386_GOTOFF,
12473                                        BFD_RELOC_X86_64_GOTOFF64 },
12474       OPERAND_TYPE_IMM64_DISP64, true },
12475     { STRING_COMMA_LEN ("GOTPCREL"), { _dummy_first_bfd_reloc_code_real,
12476                                        BFD_RELOC_X86_64_GOTPCREL },
12477       OPERAND_TYPE_IMM32_32S_DISP32, true },
12478     { STRING_COMMA_LEN ("TLSGD"),    { BFD_RELOC_386_TLS_GD,
12479                                        BFD_RELOC_X86_64_TLSGD    },
12480       OPERAND_TYPE_IMM32_32S_DISP32, true },
12481     { STRING_COMMA_LEN ("TLSLDM"),   { BFD_RELOC_386_TLS_LDM,
12482                                        _dummy_first_bfd_reloc_code_real },
12483       OPERAND_TYPE_NONE, true },
12484     { STRING_COMMA_LEN ("TLSLD"),    { _dummy_first_bfd_reloc_code_real,
12485                                        BFD_RELOC_X86_64_TLSLD    },
12486       OPERAND_TYPE_IMM32_32S_DISP32, true },
12487     { STRING_COMMA_LEN ("GOTTPOFF"), { BFD_RELOC_386_TLS_IE_32,
12488                                        BFD_RELOC_X86_64_GOTTPOFF },
12489       OPERAND_TYPE_IMM32_32S_DISP32, true },
12490     { STRING_COMMA_LEN ("TPOFF"),    { BFD_RELOC_386_TLS_LE_32,
12491                                        BFD_RELOC_X86_64_TPOFF32  },
12492       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
12493     { STRING_COMMA_LEN ("NTPOFF"),   { BFD_RELOC_386_TLS_LE,
12494                                        _dummy_first_bfd_reloc_code_real },
12495       OPERAND_TYPE_NONE, true },
12496     { STRING_COMMA_LEN ("DTPOFF"),   { BFD_RELOC_386_TLS_LDO_32,
12497                                        BFD_RELOC_X86_64_DTPOFF32 },
12498       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
12499     { STRING_COMMA_LEN ("GOTNTPOFF"),{ BFD_RELOC_386_TLS_GOTIE,
12500                                        _dummy_first_bfd_reloc_code_real },
12501       OPERAND_TYPE_NONE, true },
12502     { STRING_COMMA_LEN ("INDNTPOFF"),{ BFD_RELOC_386_TLS_IE,
12503                                        _dummy_first_bfd_reloc_code_real },
12504       OPERAND_TYPE_NONE, true },
12505     { STRING_COMMA_LEN ("GOT"),      { BFD_RELOC_386_GOT32,
12506                                        BFD_RELOC_X86_64_GOT32    },
12507       OPERAND_TYPE_IMM32_32S_64_DISP32, true },
12508     { STRING_COMMA_LEN ("TLSDESC"),  { BFD_RELOC_386_TLS_GOTDESC,
12509                                        BFD_RELOC_X86_64_GOTPC32_TLSDESC },
12510       OPERAND_TYPE_IMM32_32S_DISP32, true },
12511     { STRING_COMMA_LEN ("TLSCALL"),  { BFD_RELOC_386_TLS_DESC_CALL,
12512                                        BFD_RELOC_X86_64_TLSDESC_CALL },
12513       OPERAND_TYPE_IMM32_32S_DISP32, true },
12514 #else /* TE_PE */
12515     { STRING_COMMA_LEN ("SECREL32"), { BFD_RELOC_32_SECREL,
12516                                        BFD_RELOC_32_SECREL },
12517       OPERAND_TYPE_IMM32_32S_64_DISP32_64, false },
12518 #endif
12519
12520 #undef OPERAND_TYPE_IMM32_32S_DISP32
12521 #undef OPERAND_TYPE_IMM32_32S_64_DISP32
12522 #undef OPERAND_TYPE_IMM32_32S_64_DISP32_64
12523 #undef OPERAND_TYPE_IMM64_DISP64
12524
12525   };
12526   char *cp;
12527   unsigned int j;
12528
12529 #if defined (OBJ_MAYBE_ELF) && !defined (TE_PE)
12530   if (!IS_ELF)
12531     return NULL;
12532 #endif
12533
12534   for (cp = input_line_pointer; *cp != '@'; cp++)
12535     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
12536       return NULL;
12537
12538   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
12539     {
12540       int len = gotrel[j].len;
12541       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
12542         {
12543           if (gotrel[j].rel[object_64bit] != 0)
12544             {
12545               int first, second;
12546               char *tmpbuf, *past_reloc;
12547
12548               *rel = gotrel[j].rel[object_64bit];
12549
12550               if (types)
12551                 {
12552                   if (flag_code != CODE_64BIT)
12553                     {
12554                       types->bitfield.imm32 = 1;
12555                       types->bitfield.disp32 = 1;
12556                     }
12557                   else
12558                     *types = gotrel[j].types64;
12559                 }
12560
12561               if (gotrel[j].need_GOT_symbol && GOT_symbol == NULL)
12562                 GOT_symbol = symbol_find_or_make (GLOBAL_OFFSET_TABLE_NAME);
12563
12564               /* The length of the first part of our input line.  */
12565               first = cp - input_line_pointer;
12566
12567               /* The second part goes from after the reloc token until
12568                  (and including) an end_of_line char or comma.  */
12569               past_reloc = cp + 1 + len;
12570               cp = past_reloc;
12571               while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
12572                 ++cp;
12573               second = cp + 1 - past_reloc;
12574
12575               /* Allocate and copy string.  The trailing NUL shouldn't
12576                  be necessary, but be safe.  */
12577               tmpbuf = XNEWVEC (char, first + second + 2);
12578               memcpy (tmpbuf, input_line_pointer, first);
12579               if (second != 0 && *past_reloc != ' ')
12580                 /* Replace the relocation token with ' ', so that
12581                    errors like foo@GOTOFF1 will be detected.  */
12582                 tmpbuf[first++] = ' ';
12583               else
12584                 /* Increment length by 1 if the relocation token is
12585                    removed.  */
12586                 len++;
12587               if (adjust)
12588                 *adjust = len;
12589               memcpy (tmpbuf + first, past_reloc, second);
12590               tmpbuf[first + second] = '\0';
12591               return tmpbuf;
12592             }
12593
12594           as_bad (_("@%s reloc is not supported with %d-bit output format"),
12595                   gotrel[j].str, 1 << (5 + object_64bit));
12596           return NULL;
12597         }
12598     }
12599
12600   /* Might be a symbol version string.  Don't as_bad here.  */
12601   return NULL;
12602 }
12603 #endif
12604
12605 bfd_reloc_code_real_type
12606 x86_cons (expressionS *exp, int size)
12607 {
12608   bfd_reloc_code_real_type got_reloc = NO_RELOC;
12609
12610   intel_syntax = -intel_syntax;
12611   exp->X_md = 0;
12612   expr_mode = expr_operator_none;
12613
12614 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)) \
12615       && !defined (LEX_AT)) \
12616     || defined (TE_PE)
12617   if (size == 4 || (object_64bit && size == 8))
12618     {
12619       /* Handle @GOTOFF and the like in an expression.  */
12620       char *save;
12621       char *gotfree_input_line;
12622       int adjust = 0;
12623
12624       save = input_line_pointer;
12625       gotfree_input_line = lex_got (&got_reloc, &adjust, NULL);
12626       if (gotfree_input_line)
12627         input_line_pointer = gotfree_input_line;
12628
12629       expression (exp);
12630
12631       if (gotfree_input_line)
12632         {
12633           /* expression () has merrily parsed up to the end of line,
12634              or a comma - in the wrong buffer.  Transfer how far
12635              input_line_pointer has moved to the right buffer.  */
12636           input_line_pointer = (save
12637                                 + (input_line_pointer - gotfree_input_line)
12638                                 + adjust);
12639           free (gotfree_input_line);
12640           if (exp->X_op == O_constant
12641               || exp->X_op == O_absent
12642               || exp->X_op == O_illegal
12643               || exp->X_op == O_register
12644               || exp->X_op == O_big)
12645             {
12646               char c = *input_line_pointer;
12647               *input_line_pointer = 0;
12648               as_bad (_("missing or invalid expression `%s'"), save);
12649               *input_line_pointer = c;
12650             }
12651           else if ((got_reloc == BFD_RELOC_386_PLT32
12652                     || got_reloc == BFD_RELOC_X86_64_PLT32)
12653                    && exp->X_op != O_symbol)
12654             {
12655               char c = *input_line_pointer;
12656               *input_line_pointer = 0;
12657               as_bad (_("invalid PLT expression `%s'"), save);
12658               *input_line_pointer = c;
12659             }
12660         }
12661     }
12662   else
12663 #endif
12664     expression (exp);
12665
12666   intel_syntax = -intel_syntax;
12667
12668   if (intel_syntax)
12669     i386_intel_simplify (exp);
12670
12671   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
12672   if (size <= 4 && expr_mode == expr_operator_present
12673       && exp->X_op == O_constant && !object_64bit)
12674     exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
12675
12676   return got_reloc;
12677 }
12678
12679 static void
12680 signed_cons (int size)
12681 {
12682   if (object_64bit)
12683     cons_sign = 1;
12684   cons (size);
12685   cons_sign = -1;
12686 }
12687
12688 static void
12689 s_insn (int dummy ATTRIBUTE_UNUSED)
12690 {
12691   char mnemonic[MAX_MNEM_SIZE], *line = input_line_pointer, *ptr;
12692   char *saved_ilp = find_end_of_line (line, false), saved_char;
12693   const char *end;
12694   unsigned int j;
12695   valueT val;
12696   bool vex = false, xop = false, evex = false;
12697   struct last_insn *last_insn;
12698
12699   init_globals ();
12700
12701   saved_char = *saved_ilp;
12702   *saved_ilp = 0;
12703
12704   end = parse_insn (line, mnemonic, true);
12705   if (end == NULL)
12706     {
12707   bad:
12708       *saved_ilp = saved_char;
12709       ignore_rest_of_line ();
12710       i.tm.mnem_off = 0;
12711       return;
12712     }
12713   line += end - line;
12714
12715   current_templates.start = &i.tm;
12716   current_templates.end = &i.tm + 1;
12717   i.tm.mnem_off = MN__insn;
12718   i.tm.extension_opcode = None;
12719
12720   if (startswith (line, "VEX")
12721       && (line[3] == '.' || is_space_char (line[3])))
12722     {
12723       vex = true;
12724       line += 3;
12725     }
12726   else if (startswith (line, "XOP") && ISDIGIT (line[3]))
12727     {
12728       char *e;
12729       unsigned long n = strtoul (line + 3, &e, 16);
12730
12731       if (e == line + 5 && n >= 0x08 && n <= 0x1f
12732           && (*e == '.' || is_space_char (*e)))
12733         {
12734           xop = true;
12735           /* Arrange for build_vex_prefix() to emit 0x8f.  */
12736           i.tm.opcode_space = SPACE_XOP08;
12737           i.insn_opcode_space = n;
12738           line = e;
12739         }
12740     }
12741   else if (startswith (line, "EVEX")
12742            && (line[4] == '.' || is_space_char (line[4])))
12743     {
12744       evex = true;
12745       line += 4;
12746     }
12747
12748   if (vex || xop
12749       ? i.encoding == encoding_evex
12750       : evex
12751         ? i.encoding == encoding_vex
12752           || i.encoding == encoding_vex3
12753         : i.encoding != encoding_default)
12754     {
12755       as_bad (_("pseudo-prefix conflicts with encoding specifier"));
12756       goto bad;
12757     }
12758
12759   if (line > end && i.encoding == encoding_default)
12760     i.encoding = evex ? encoding_evex : encoding_vex;
12761
12762   if (i.encoding != encoding_default)
12763     {
12764       /* Only address size and segment override prefixes are permitted with
12765          VEX/XOP/EVEX encodings.  */
12766       const unsigned char *p = i.prefix;
12767
12768       for (j = 0; j < ARRAY_SIZE (i.prefix); ++j, ++p)
12769         {
12770           if (!*p)
12771             continue;
12772
12773           switch (j)
12774             {
12775             case SEG_PREFIX:
12776             case ADDR_PREFIX:
12777               break;
12778             default:
12779                   as_bad (_("illegal prefix used with VEX/XOP/EVEX"));
12780                   goto bad;
12781             }
12782         }
12783     }
12784
12785   if (line > end && *line == '.')
12786     {
12787       /* Length specifier (VEX.L, XOP.L, EVEX.L'L).  */
12788       switch (line[1])
12789         {
12790         case 'L':
12791           switch (line[2])
12792             {
12793             case '0':
12794               if (evex)
12795                 i.tm.opcode_modifier.evex = EVEX128;
12796               else
12797                 i.tm.opcode_modifier.vex = VEX128;
12798               break;
12799
12800             case '1':
12801               if (evex)
12802                 i.tm.opcode_modifier.evex = EVEX256;
12803               else
12804                 i.tm.opcode_modifier.vex = VEX256;
12805               break;
12806
12807             case '2':
12808               if (evex)
12809                 i.tm.opcode_modifier.evex = EVEX512;
12810               break;
12811
12812             case '3':
12813               if (evex)
12814                 i.tm.opcode_modifier.evex = EVEX_L3;
12815               break;
12816
12817             case 'I':
12818               if (line[3] == 'G')
12819                 {
12820                   if (evex)
12821                     i.tm.opcode_modifier.evex = EVEXLIG;
12822                   else
12823                     i.tm.opcode_modifier.vex = VEXScalar; /* LIG */
12824                   ++line;
12825                 }
12826               break;
12827             }
12828
12829           if (i.tm.opcode_modifier.vex || i.tm.opcode_modifier.evex)
12830             line += 3;
12831           break;
12832
12833         case '1':
12834           if (line[2] == '2' && line[3] == '8')
12835             {
12836               if (evex)
12837                 i.tm.opcode_modifier.evex = EVEX128;
12838               else
12839                 i.tm.opcode_modifier.vex = VEX128;
12840               line += 4;
12841             }
12842           break;
12843
12844         case '2':
12845           if (line[2] == '5' && line[3] == '6')
12846             {
12847               if (evex)
12848                 i.tm.opcode_modifier.evex = EVEX256;
12849               else
12850                 i.tm.opcode_modifier.vex = VEX256;
12851               line += 4;
12852             }
12853           break;
12854
12855         case '5':
12856           if (evex && line[2] == '1' && line[3] == '2')
12857             {
12858               i.tm.opcode_modifier.evex = EVEX512;
12859               line += 4;
12860             }
12861           break;
12862         }
12863     }
12864
12865   if (line > end && *line == '.')
12866     {
12867       /* embedded prefix (VEX.pp, XOP.pp, EVEX.pp).  */
12868       switch (line[1])
12869         {
12870         case 'N':
12871           if (line[2] == 'P')
12872             line += 3;
12873           break;
12874
12875         case '6':
12876           if (line[2] == '6')
12877             {
12878               i.tm.opcode_modifier.opcodeprefix = PREFIX_0X66;
12879               line += 3;
12880             }
12881           break;
12882
12883         case 'F': case 'f':
12884           if (line[2] == '3')
12885             {
12886               i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
12887               line += 3;
12888             }
12889           else if (line[2] == '2')
12890             {
12891               i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF2;
12892               line += 3;
12893             }
12894           break;
12895         }
12896     }
12897
12898   if (line > end && !xop && *line == '.')
12899     {
12900       /* Encoding space (VEX.mmmmm, EVEX.mmmm).  */
12901       switch (line[1])
12902         {
12903         case '0':
12904           if (TOUPPER (line[2]) != 'F')
12905             break;
12906           if (line[3] == '.' || is_space_char (line[3]))
12907             {
12908               i.insn_opcode_space = SPACE_0F;
12909               line += 3;
12910             }
12911           else if (line[3] == '3'
12912                    && (line[4] == '8' || TOUPPER (line[4]) == 'A')
12913                    && (line[5] == '.' || is_space_char (line[5])))
12914             {
12915               i.insn_opcode_space = line[4] == '8' ? SPACE_0F38 : SPACE_0F3A;
12916               line += 5;
12917             }
12918           break;
12919
12920         case 'M':
12921           if (ISDIGIT (line[2]) && line[2] != '0')
12922             {
12923               char *e;
12924               unsigned long n = strtoul (line + 2, &e, 10);
12925
12926               if (n <= (evex ? 15 : 31)
12927                   && (*e == '.' || is_space_char (*e)))
12928                 {
12929                   i.insn_opcode_space = n;
12930                   line = e;
12931                 }
12932             }
12933           break;
12934         }
12935     }
12936
12937   if (line > end && *line == '.' && line[1] == 'W')
12938     {
12939       /* VEX.W, XOP.W, EVEX.W  */
12940       switch (line[2])
12941         {
12942         case '0':
12943           i.tm.opcode_modifier.vexw = VEXW0;
12944           break;
12945
12946         case '1':
12947           i.tm.opcode_modifier.vexw = VEXW1;
12948           break;
12949
12950         case 'I':
12951           if (line[3] == 'G')
12952             {
12953               i.tm.opcode_modifier.vexw = VEXWIG;
12954               ++line;
12955             }
12956           break;
12957         }
12958
12959       if (i.tm.opcode_modifier.vexw)
12960         line += 3;
12961     }
12962
12963   if (line > end && *line && !is_space_char (*line))
12964     {
12965       /* Improve diagnostic a little.  */
12966       if (*line == '.' && line[1] && !is_space_char (line[1]))
12967         ++line;
12968       goto done;
12969     }
12970
12971   /* Before processing the opcode expression, find trailing "+r" or
12972      "/<digit>" specifiers.  */
12973   for (ptr = line; ; ++ptr)
12974     {
12975       unsigned long n;
12976       char *e;
12977
12978       ptr = strpbrk (ptr, "+/,");
12979       if (ptr == NULL || *ptr == ',')
12980         break;
12981
12982       if (*ptr == '+' && ptr[1] == 'r'
12983           && (ptr[2] == ',' || (is_space_char (ptr[2]) && ptr[3] == ',')))
12984         {
12985           *ptr = ' ';
12986           ptr[1] = ' ';
12987           i.short_form = true;
12988           break;
12989         }
12990
12991       if (*ptr == '/' && ISDIGIT (ptr[1])
12992           && (n = strtoul (ptr + 1, &e, 8)) < 8
12993           && e == ptr + 2
12994           && (ptr[2] == ',' || (is_space_char (ptr[2]) && ptr[3] == ',')))
12995         {
12996           *ptr = ' ';
12997           ptr[1] = ' ';
12998           i.tm.extension_opcode = n;
12999           i.tm.opcode_modifier.modrm = 1;
13000           break;
13001         }
13002     }
13003
13004   input_line_pointer = line;
13005   val = get_absolute_expression ();
13006   line = input_line_pointer;
13007
13008   if (i.short_form && (val & 7))
13009     as_warn ("`+r' assumes low three opcode bits to be clear");
13010
13011   for (j = 1; j < sizeof(val); ++j)
13012     if (!(val >> (j * 8)))
13013       break;
13014
13015   /* Trim off a prefix if present.  */
13016   if (j > 1 && !vex && !xop && !evex)
13017     {
13018       uint8_t byte = val >> ((j - 1) * 8);
13019
13020       switch (byte)
13021         {
13022         case DATA_PREFIX_OPCODE:
13023         case REPE_PREFIX_OPCODE:
13024         case REPNE_PREFIX_OPCODE:
13025           if (!add_prefix (byte))
13026             goto bad;
13027           val &= ((uint64_t)1 << (--j * 8)) - 1;
13028           break;
13029         }
13030     }
13031
13032   /* Parse operands, if any, before evaluating encoding space.  */
13033   if (*line == ',')
13034     {
13035       i.memshift = -1;
13036
13037       ptr = parse_operands (line + 1, &i386_mnemonics[MN__insn]);
13038       this_operand = -1;
13039       if (!ptr)
13040         goto bad;
13041       line = ptr;
13042
13043       if (!i.operands)
13044         {
13045           as_bad (_("expecting operand after ','; got nothing"));
13046           goto done;
13047         }
13048
13049       if (i.mem_operands > 1)
13050         {
13051           as_bad (_("too many memory references for `%s'"),
13052                   &i386_mnemonics[MN__insn]);
13053           goto done;
13054         }
13055
13056       /* No need to distinguish encoding_evex and encoding_evex512.  */
13057       if (i.encoding == encoding_evex512)
13058         i.encoding = encoding_evex;
13059     }
13060
13061   /* Trim off encoding space.  */
13062   if (j > 1 && !i.insn_opcode_space && (val >> ((j - 1) * 8)) == 0x0f)
13063     {
13064       uint8_t byte = val >> ((--j - 1) * 8);
13065
13066       i.insn_opcode_space = SPACE_0F;
13067       switch (byte & -(j > 1 && !i.rex2_encoding
13068                        && (i.encoding != encoding_egpr || evex)))
13069         {
13070         case 0x38:
13071           i.insn_opcode_space = SPACE_0F38;
13072           --j;
13073           break;
13074         case 0x3a:
13075           i.insn_opcode_space = SPACE_0F3A;
13076           --j;
13077           break;
13078         }
13079       i.tm.opcode_space = i.insn_opcode_space;
13080       val &= ((uint64_t)1 << (j * 8)) - 1;
13081     }
13082   if (!i.tm.opcode_space && (vex || evex))
13083     /* Arrange for build_vex_prefix() to properly emit 0xC4/0xC5.
13084        Also avoid hitting abort() there or in build_evex_prefix().  */
13085     i.tm.opcode_space = i.insn_opcode_space == SPACE_0F ? SPACE_0F
13086                                                    : SPACE_0F38;
13087
13088   if (j > 2)
13089     {
13090       as_bad (_("opcode residual (%#"PRIx64") too wide"), (uint64_t) val);
13091       goto done;
13092     }
13093   i.opcode_length = j;
13094
13095   /* Handle operands, if any.  */
13096   if (i.operands)
13097     {
13098       i386_operand_type combined;
13099       expressionS *disp_exp = NULL;
13100       bool changed;
13101
13102       if (i.encoding == encoding_egpr)
13103         {
13104           if (vex || xop)
13105             {
13106               as_bad (_("eGPR use conflicts with encoding specifier"));
13107               goto done;
13108             }
13109           if (evex)
13110             i.encoding = encoding_evex;
13111           else
13112             i.encoding = encoding_default;
13113         }
13114
13115       /* Are we to emit ModR/M encoding?  */
13116       if (!i.short_form
13117           && (i.mem_operands
13118               || i.reg_operands > (i.encoding != encoding_default)
13119               || i.tm.extension_opcode != None))
13120         i.tm.opcode_modifier.modrm = 1;
13121
13122       if (!i.tm.opcode_modifier.modrm
13123           && (i.reg_operands
13124               > i.short_form + 0U + (i.encoding != encoding_default)
13125               || i.mem_operands))
13126         {
13127           as_bad (_("too many register/memory operands"));
13128           goto done;
13129         }
13130
13131       /* Enforce certain constraints on operands.  */
13132       switch (i.reg_operands + i.mem_operands
13133               + (i.tm.extension_opcode != None))
13134         {
13135         case 0:
13136           if (i.short_form)
13137             {
13138               as_bad (_("too few register/memory operands"));
13139               goto done;
13140             }
13141           /* Fall through.  */
13142         case 1:
13143           if (i.tm.opcode_modifier.modrm)
13144             {
13145               as_bad (_("too few register/memory operands"));
13146               goto done;
13147             }
13148           break;
13149
13150         case 2:
13151           break;
13152
13153         case 4:
13154           if (i.imm_operands
13155               && (i.op[0].imms->X_op != O_constant
13156                   || !fits_in_imm4 (i.op[0].imms->X_add_number)))
13157             {
13158               as_bad (_("constant doesn't fit in %d bits"), evex ? 3 : 4);
13159               goto done;
13160             }
13161           /* Fall through.  */
13162         case 3:
13163           if (i.encoding != encoding_default)
13164             {
13165               i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC1;
13166               break;
13167             }
13168           /* Fall through.  */
13169         default:
13170           as_bad (_("too many register/memory operands"));
13171           goto done;
13172         }
13173
13174       /* Bring operands into canonical order (imm, mem, reg).  */
13175       do
13176         {
13177           changed = false;
13178
13179           for (j = 1; j < i.operands; ++j)
13180             {
13181               if ((!operand_type_check (i.types[j - 1], imm)
13182                    && operand_type_check (i.types[j], imm))
13183                   || (i.types[j - 1].bitfield.class != ClassNone
13184                       && i.types[j].bitfield.class == ClassNone))
13185                 {
13186                   swap_2_operands (j - 1, j);
13187                   changed = true;
13188                 }
13189             }
13190         }
13191       while (changed);
13192
13193       /* For Intel syntax swap the order of register operands.  */
13194       if (intel_syntax)
13195         switch (i.reg_operands)
13196           {
13197           case 0:
13198           case 1:
13199             break;
13200
13201           case 4:
13202             swap_2_operands (i.imm_operands + i.mem_operands + 1, i.operands - 2);
13203             /* Fall through.  */
13204           case 3:
13205           case 2:
13206             swap_2_operands (i.imm_operands + i.mem_operands, i.operands - 1);
13207             break;
13208
13209           default:
13210             abort ();
13211           }
13212
13213       /* Enforce constraints when using VSIB.  */
13214       if (i.index_reg
13215           && (i.index_reg->reg_type.bitfield.xmmword
13216               || i.index_reg->reg_type.bitfield.ymmword
13217               || i.index_reg->reg_type.bitfield.zmmword))
13218         {
13219           if (i.encoding == encoding_default)
13220             {
13221               as_bad (_("VSIB unavailable with legacy encoding"));
13222               goto done;
13223             }
13224
13225           if (i.encoding == encoding_evex
13226               && i.reg_operands > 1)
13227             {
13228               /* We could allow two register operands, encoding the 2nd one in
13229                  an 8-bit immediate like for 4-register-operand insns, but that
13230                  would require ugly fiddling with process_operands() and/or
13231                  build_modrm_byte().  */
13232               as_bad (_("too many register operands with VSIB"));
13233               goto done;
13234             }
13235
13236           i.tm.opcode_modifier.sib = 1;
13237         }
13238
13239       /* Establish operand size encoding.  */
13240       operand_type_set (&combined, 0);
13241
13242       for (j = i.imm_operands; j < i.operands; ++j)
13243         {
13244           /* Look for 8-bit operands that use old registers.  */
13245           if (i.encoding != encoding_default
13246               && flag_code == CODE_64BIT
13247               && i.types[j].bitfield.class == Reg
13248               && i.types[j].bitfield.byte
13249               && !(i.op[j].regs->reg_flags & RegRex64)
13250               && i.op[j].regs->reg_num > 3)
13251             as_bad (_("can't encode register '%s%s' with VEX/XOP/EVEX"),
13252                     register_prefix, i.op[j].regs->reg_name);
13253
13254           i.types[j].bitfield.instance = InstanceNone;
13255
13256           if (operand_type_check (i.types[j], disp))
13257             {
13258               i.types[j].bitfield.baseindex = 1;
13259               disp_exp = i.op[j].disps;
13260             }
13261
13262           if (evex && i.types[j].bitfield.baseindex)
13263             {
13264               unsigned int n = i.memshift;
13265
13266               if (i.types[j].bitfield.byte)
13267                 n = 0;
13268               else if (i.types[j].bitfield.word)
13269                 n = 1;
13270               else if (i.types[j].bitfield.dword)
13271                 n = 2;
13272               else if (i.types[j].bitfield.qword)
13273                 n = 3;
13274               else if (i.types[j].bitfield.xmmword)
13275                 n = 4;
13276               else if (i.types[j].bitfield.ymmword)
13277                 n = 5;
13278               else if (i.types[j].bitfield.zmmword)
13279                 n = 6;
13280
13281               if (i.memshift < 32 && n != i.memshift)
13282                 as_warn ("conflicting memory operand size specifiers");
13283               i.memshift = n;
13284             }
13285
13286           if ((i.broadcast.type || i.broadcast.bytes)
13287               && j == i.broadcast.operand)
13288             continue;
13289
13290           combined = operand_type_or (combined, i.types[j]);
13291           combined.bitfield.class = ClassNone;
13292         }
13293
13294       switch ((i.broadcast.type ? i.broadcast.type : 1)
13295               << (i.memshift < 32 ? i.memshift : 0))
13296         {
13297         case 64: combined.bitfield.zmmword = 1; break;
13298         case 32: combined.bitfield.ymmword = 1; break;
13299         case 16: combined.bitfield.xmmword = 1; break;
13300         case  8: combined.bitfield.qword = 1; break;
13301         case  4: combined.bitfield.dword = 1; break;
13302         }
13303
13304       if (i.encoding == encoding_default)
13305         {
13306           if (flag_code == CODE_64BIT && combined.bitfield.qword)
13307             i.rex |= REX_W;
13308           else if ((flag_code == CODE_16BIT ? combined.bitfield.dword
13309                                             : combined.bitfield.word)
13310                    && !add_prefix (DATA_PREFIX_OPCODE))
13311             goto done;
13312         }
13313       else if (!i.tm.opcode_modifier.vexw)
13314         {
13315           if (flag_code == CODE_64BIT)
13316             {
13317               if (combined.bitfield.qword)
13318                 i.tm.opcode_modifier.vexw = VEXW1;
13319               else if (combined.bitfield.dword)
13320                 i.tm.opcode_modifier.vexw = VEXW0;
13321             }
13322
13323           if (!i.tm.opcode_modifier.vexw)
13324             i.tm.opcode_modifier.vexw = VEXWIG;
13325         }
13326
13327       if (vex || xop)
13328         {
13329           if (!i.tm.opcode_modifier.vex)
13330             {
13331               if (combined.bitfield.ymmword)
13332                 i.tm.opcode_modifier.vex = VEX256;
13333               else if (combined.bitfield.xmmword)
13334                 i.tm.opcode_modifier.vex = VEX128;
13335             }
13336         }
13337       else if (evex)
13338         {
13339           if (!i.tm.opcode_modifier.evex)
13340             {
13341               /* Do _not_ consider AVX512VL here.  */
13342               if (i.rounding.type != rc_none || combined.bitfield.zmmword)
13343                 i.tm.opcode_modifier.evex = EVEX512;
13344               else if (combined.bitfield.ymmword)
13345                 i.tm.opcode_modifier.evex = EVEX256;
13346               else if (combined.bitfield.xmmword)
13347                 i.tm.opcode_modifier.evex = EVEX128;
13348             }
13349
13350           if (i.memshift >= 32)
13351             {
13352               unsigned int n = 0;
13353
13354               switch (i.tm.opcode_modifier.evex)
13355                 {
13356                 case EVEX512: n = 64; break;
13357                 case EVEX256: n = 32; break;
13358                 case EVEX128: n = 16; break;
13359                 }
13360
13361               if (i.broadcast.type)
13362                 n /= i.broadcast.type;
13363
13364               if (n > 0)
13365                 for (i.memshift = 0; !(n & 1); n >>= 1)
13366                   ++i.memshift;
13367               else if (disp_exp != NULL && disp_exp->X_op == O_constant
13368                        && disp_exp->X_add_number != 0
13369                        && i.disp_encoding != disp_encoding_32bit)
13370                 {
13371                   if (!quiet_warnings)
13372                     as_warn ("cannot determine memory operand size");
13373                   i.disp_encoding = disp_encoding_32bit;
13374                 }
13375             }
13376         }
13377
13378       if (i.memshift >= 32)
13379         i.memshift = 0;
13380       else if (!evex)
13381         i.encoding = encoding_error;
13382
13383       if (i.disp_operands && !optimize_disp (&i.tm))
13384         goto done;
13385
13386       /* Establish size for immediate operands.  */
13387       for (j = 0; j < i.imm_operands; ++j)
13388         {
13389           expressionS *expP = i.op[j].imms;
13390
13391           gas_assert (operand_type_check (i.types[j], imm));
13392           operand_type_set (&i.types[j], 0);
13393
13394           if (i.imm_bits[j] > 32)
13395             i.types[j].bitfield.imm64 = 1;
13396           else if (i.imm_bits[j] > 16)
13397             {
13398               if (flag_code == CODE_64BIT && (i.flags[j] & Operand_Signed))
13399                 i.types[j].bitfield.imm32s = 1;
13400               else
13401                 i.types[j].bitfield.imm32 = 1;
13402             }
13403           else if (i.imm_bits[j] > 8)
13404             i.types[j].bitfield.imm16 = 1;
13405           else if (i.imm_bits[j] > 0)
13406             {
13407               if (i.flags[j] & Operand_Signed)
13408                 i.types[j].bitfield.imm8s = 1;
13409               else
13410                 i.types[j].bitfield.imm8 = 1;
13411             }
13412           else if (expP->X_op == O_constant)
13413             {
13414               i.types[j] = smallest_imm_type (expP->X_add_number);
13415               i.types[j].bitfield.imm1 = 0;
13416               /* Oddly enough imm_size() checks imm64 first, so the bit needs
13417                  zapping since smallest_imm_type() sets it unconditionally.  */
13418               if (flag_code != CODE_64BIT)
13419                 {
13420                   i.types[j].bitfield.imm64 = 0;
13421                   i.types[j].bitfield.imm32s = 0;
13422                   i.types[j].bitfield.imm32 = 1;
13423                 }
13424               else if (i.types[j].bitfield.imm32 || i.types[j].bitfield.imm32s)
13425                 i.types[j].bitfield.imm64 = 0;
13426             }
13427           else
13428             /* Non-constant expressions are sized heuristically.  */
13429             switch (flag_code)
13430               {
13431               case CODE_64BIT: i.types[j].bitfield.imm32s = 1; break;
13432               case CODE_32BIT: i.types[j].bitfield.imm32 = 1; break;
13433               case CODE_16BIT: i.types[j].bitfield.imm16 = 1; break;
13434               }
13435         }
13436
13437       for (j = 0; j < i.operands; ++j)
13438         i.tm.operand_types[j] = i.types[j];
13439
13440       process_operands ();
13441     }
13442
13443   /* Don't set opcode until after processing operands, to avoid any
13444      potential special casing there.  */
13445   i.tm.base_opcode |= val;
13446
13447   if (i.encoding == encoding_error
13448       || (i.encoding != encoding_evex
13449           ? i.broadcast.type || i.broadcast.bytes
13450             || i.rounding.type != rc_none
13451             || i.mask.reg
13452           : (i.mem_operands && i.rounding.type != rc_none)
13453             || ((i.broadcast.type || i.broadcast.bytes)
13454                 && !(i.flags[i.broadcast.operand] & Operand_Mem))))
13455     {
13456       as_bad (_("conflicting .insn operands"));
13457       goto done;
13458     }
13459
13460   if (vex || xop)
13461     {
13462       if (!i.tm.opcode_modifier.vex)
13463         i.tm.opcode_modifier.vex = VEXScalar; /* LIG */
13464
13465       build_vex_prefix (NULL);
13466       i.rex &= REX_OPCODE;
13467     }
13468   else if (evex)
13469     {
13470       if (!i.tm.opcode_modifier.evex)
13471         i.tm.opcode_modifier.evex = EVEXLIG;
13472
13473       build_evex_prefix ();
13474       i.rex &= REX_OPCODE;
13475     }
13476   else
13477     establish_rex ();
13478
13479   last_insn = &seg_info(now_seg)->tc_segment_info_data.last_insn;
13480   output_insn (last_insn);
13481   last_insn->kind = last_insn_directive;
13482   last_insn->name = ".insn directive";
13483   last_insn->file = as_where (&last_insn->line);
13484
13485 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
13486   /* PS: SCFI is enabled only for System V AMD64 ABI.  The ABI check has been
13487      performed in i386_target_format.  */
13488   if (IS_ELF && flag_synth_cfi)
13489     as_bad (_("SCFI: hand-crafting instructions not supported"));
13490 #endif
13491
13492  done:
13493   *saved_ilp = saved_char;
13494   input_line_pointer = line;
13495
13496   demand_empty_rest_of_line ();
13497
13498   /* Make sure dot_insn() won't yield "true" anymore.  */
13499   i.tm.mnem_off = 0;
13500 }
13501
13502 #ifdef TE_PE
13503 static void
13504 pe_directive_secrel (int dummy ATTRIBUTE_UNUSED)
13505 {
13506   expressionS exp;
13507
13508   do
13509     {
13510       expression (&exp);
13511       if (exp.X_op == O_symbol)
13512         exp.X_op = O_secrel;
13513
13514       emit_expr (&exp, 4);
13515     }
13516   while (*input_line_pointer++ == ',');
13517
13518   input_line_pointer--;
13519   demand_empty_rest_of_line ();
13520 }
13521
13522 static void
13523 pe_directive_secidx (int dummy ATTRIBUTE_UNUSED)
13524 {
13525   expressionS exp;
13526
13527   do
13528     {
13529       expression (&exp);
13530       if (exp.X_op == O_symbol)
13531         exp.X_op = O_secidx;
13532
13533       emit_expr (&exp, 2);
13534     }
13535   while (*input_line_pointer++ == ',');
13536
13537   input_line_pointer--;
13538   demand_empty_rest_of_line ();
13539 }
13540 #endif
13541
13542 /* Handle Rounding Control / SAE specifiers.  */
13543
13544 static char *
13545 RC_SAE_specifier (const char *pstr)
13546 {
13547   unsigned int j;
13548
13549   for (j = 0; j < ARRAY_SIZE (RC_NamesTable); j++)
13550     {
13551       if (!strncmp (pstr, RC_NamesTable[j].name, RC_NamesTable[j].len))
13552         {
13553           if (i.rounding.type != rc_none)
13554             {
13555               as_bad (_("duplicated `{%s}'"), RC_NamesTable[j].name);
13556               return NULL;
13557             }
13558
13559           switch (i.encoding)
13560             {
13561             case encoding_default:
13562             case encoding_egpr:
13563               i.encoding = encoding_evex512;
13564               break;
13565             case encoding_evex:
13566             case encoding_evex512:
13567               break;
13568             default:
13569               return NULL;
13570             }
13571
13572           i.rounding.type = RC_NamesTable[j].type;
13573
13574           return (char *)(pstr + RC_NamesTable[j].len);
13575         }
13576     }
13577
13578   return NULL;
13579 }
13580
13581 /* Handle Vector operations.  */
13582
13583 static char *
13584 check_VecOperations (char *op_string)
13585 {
13586   const reg_entry *mask;
13587   const char *saved;
13588   char *end_op;
13589
13590   while (*op_string)
13591     {
13592       saved = op_string;
13593       if (*op_string == '{')
13594         {
13595           op_string++;
13596
13597           /* Check broadcasts.  */
13598           if (startswith (op_string, "1to"))
13599             {
13600               unsigned int bcst_type;
13601
13602               if (i.broadcast.type)
13603                 goto duplicated_vec_op;
13604
13605               op_string += 3;
13606               if (*op_string == '8')
13607                 bcst_type = 8;
13608               else if (*op_string == '4')
13609                 bcst_type = 4;
13610               else if (*op_string == '2')
13611                 bcst_type = 2;
13612               else if (*op_string == '1'
13613                        && *(op_string+1) == '6')
13614                 {
13615                   bcst_type = 16;
13616                   op_string++;
13617                 }
13618               else if (*op_string == '3'
13619                        && *(op_string+1) == '2')
13620                 {
13621                   bcst_type = 32;
13622                   op_string++;
13623                 }
13624               else
13625                 {
13626                   as_bad (_("Unsupported broadcast: `%s'"), saved);
13627                   return NULL;
13628                 }
13629               op_string++;
13630
13631               switch (i.encoding)
13632                 {
13633                 case encoding_default:
13634                 case encoding_egpr:
13635                   i.encoding = encoding_evex;
13636                   break;
13637                 case encoding_evex:
13638                 case encoding_evex512:
13639                   break;
13640                 default:
13641                   goto unknown_vec_op;
13642                 }
13643
13644               i.broadcast.type = bcst_type;
13645               i.broadcast.operand = this_operand;
13646
13647               /* For .insn a data size specifier may be appended.  */
13648               if (dot_insn () && *op_string == ':')
13649                 goto dot_insn_modifier;
13650             }
13651           /* Check .insn special cases.  */
13652           else if (dot_insn () && *op_string == ':')
13653             {
13654             dot_insn_modifier:
13655               switch (op_string[1])
13656                 {
13657                   unsigned long n;
13658
13659                 case 'd':
13660                   if (i.memshift < 32)
13661                     goto duplicated_vec_op;
13662
13663                   n = strtoul (op_string + 2, &end_op, 0);
13664                   if (n)
13665                     for (i.memshift = 0; !(n & 1); n >>= 1)
13666                       ++i.memshift;
13667                   if (i.memshift < 32 && n == 1)
13668                     op_string = end_op;
13669                   break;
13670
13671                 case 's': case 'u':
13672                   /* This isn't really a "vector" operation, but a sign/size
13673                      specifier for immediate operands of .insn.  Note that AT&T
13674                      syntax handles the same in i386_immediate().  */
13675                   if (!intel_syntax)
13676                     break;
13677
13678                   if (i.imm_bits[this_operand])
13679                     goto duplicated_vec_op;
13680
13681                   n = strtoul (op_string + 2, &end_op, 0);
13682                   if (n && n <= (flag_code == CODE_64BIT ? 64 : 32))
13683                     {
13684                       i.imm_bits[this_operand] = n;
13685                       if (op_string[1] == 's')
13686                         i.flags[this_operand] |= Operand_Signed;
13687                       op_string = end_op;
13688                     }
13689                   break;
13690                 }
13691             }
13692           /* Check masking operation.  */
13693           else if ((mask = parse_register (op_string, &end_op)) != NULL)
13694             {
13695               if (mask == &bad_reg)
13696                 return NULL;
13697
13698               /* k0 can't be used for write mask.  */
13699               if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num)
13700                 {
13701                   as_bad (_("`%s%s' can't be used for write mask"),
13702                           register_prefix, mask->reg_name);
13703                   return NULL;
13704                 }
13705
13706               if (!i.mask.reg)
13707                 {
13708                   i.mask.reg = mask;
13709                   i.mask.operand = this_operand;
13710                 }
13711               else if (i.mask.reg->reg_num)
13712                 goto duplicated_vec_op;
13713               else
13714                 {
13715                   i.mask.reg = mask;
13716
13717                   /* Only "{z}" is allowed here.  No need to check
13718                      zeroing mask explicitly.  */
13719                   if (i.mask.operand != (unsigned int) this_operand)
13720                     {
13721                       as_bad (_("invalid write mask `%s'"), saved);
13722                       return NULL;
13723                     }
13724                 }
13725
13726               op_string = end_op;
13727             }
13728           /* Check zeroing-flag for masking operation.  */
13729           else if (*op_string == 'z')
13730             {
13731               if (!i.mask.reg)
13732                 {
13733                   i.mask.reg = reg_k0;
13734                   i.mask.zeroing = 1;
13735                   i.mask.operand = this_operand;
13736                 }
13737               else
13738                 {
13739                   if (i.mask.zeroing)
13740                     {
13741                     duplicated_vec_op:
13742                       as_bad (_("duplicated `%s'"), saved);
13743                       return NULL;
13744                     }
13745
13746                   i.mask.zeroing = 1;
13747
13748                   /* Only "{%k}" is allowed here.  No need to check mask
13749                      register explicitly.  */
13750                   if (i.mask.operand != (unsigned int) this_operand)
13751                     {
13752                       as_bad (_("invalid zeroing-masking `%s'"),
13753                               saved);
13754                       return NULL;
13755                     }
13756                 }
13757
13758               op_string++;
13759             }
13760           else if (intel_syntax
13761                    && (op_string = RC_SAE_specifier (op_string)) != NULL)
13762             i.rounding.modifier = true;
13763           else
13764             goto unknown_vec_op;
13765
13766           if (*op_string != '}')
13767             {
13768               as_bad (_("missing `}' in `%s'"), saved);
13769               return NULL;
13770             }
13771           op_string++;
13772
13773           /* Strip whitespace since the addition of pseudo prefixes
13774              changed how the scrubber treats '{'.  */
13775           if (is_space_char (*op_string))
13776             ++op_string;
13777
13778           continue;
13779         }
13780     unknown_vec_op:
13781       /* We don't know this one.  */
13782       as_bad (_("unknown vector operation: `%s'"), saved);
13783       return NULL;
13784     }
13785
13786   if (i.mask.reg && i.mask.zeroing && !i.mask.reg->reg_num)
13787     {
13788       as_bad (_("zeroing-masking only allowed with write mask"));
13789       return NULL;
13790     }
13791
13792   return op_string;
13793 }
13794
13795 static int
13796 i386_immediate (char *imm_start)
13797 {
13798   char *save_input_line_pointer;
13799   char *gotfree_input_line;
13800   segT exp_seg = 0;
13801   expressionS *exp;
13802   i386_operand_type types;
13803
13804   operand_type_set (&types, ~0);
13805
13806   if (i.imm_operands == MAX_IMMEDIATE_OPERANDS)
13807     {
13808       as_bad (_("at most %d immediate operands are allowed"),
13809               MAX_IMMEDIATE_OPERANDS);
13810       return 0;
13811     }
13812
13813   exp = &im_expressions[i.imm_operands++];
13814   i.op[this_operand].imms = exp;
13815
13816   if (is_space_char (*imm_start))
13817     ++imm_start;
13818
13819   save_input_line_pointer = input_line_pointer;
13820   input_line_pointer = imm_start;
13821
13822   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
13823   if (gotfree_input_line)
13824     input_line_pointer = gotfree_input_line;
13825
13826   expr_mode = expr_operator_none;
13827   exp_seg = expression (exp);
13828
13829   /* For .insn immediates there may be a size specifier.  */
13830   if (dot_insn () && *input_line_pointer == '{' && input_line_pointer[1] == ':'
13831       && (input_line_pointer[2] == 's' || input_line_pointer[2] == 'u'))
13832     {
13833       char *e;
13834       unsigned long n = strtoul (input_line_pointer + 3, &e, 0);
13835
13836       if (*e == '}' && n && n <= (flag_code == CODE_64BIT ? 64 : 32))
13837         {
13838           i.imm_bits[this_operand] = n;
13839           if (input_line_pointer[2] == 's')
13840             i.flags[this_operand] |= Operand_Signed;
13841           input_line_pointer = e + 1;
13842         }
13843     }
13844
13845   SKIP_WHITESPACE ();
13846   if (*input_line_pointer)
13847     as_bad (_("junk `%s' after expression"), input_line_pointer);
13848
13849   input_line_pointer = save_input_line_pointer;
13850   if (gotfree_input_line)
13851     {
13852       free (gotfree_input_line);
13853
13854       if (exp->X_op == O_constant)
13855         exp->X_op = O_illegal;
13856     }
13857
13858   if (exp_seg == reg_section)
13859     {
13860       as_bad (_("illegal immediate register operand %s"), imm_start);
13861       return 0;
13862     }
13863
13864   return i386_finalize_immediate (exp_seg, exp, types, imm_start);
13865 }
13866
13867 static int
13868 i386_finalize_immediate (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
13869                          i386_operand_type types, const char *imm_start)
13870 {
13871   if (exp->X_op == O_absent || exp->X_op == O_illegal || exp->X_op == O_big)
13872     {
13873       if (imm_start)
13874         as_bad (_("missing or invalid immediate expression `%s'"),
13875                 imm_start);
13876       return 0;
13877     }
13878   else if (exp->X_op == O_constant)
13879     {
13880       /* Size it properly later.  */
13881       i.types[this_operand].bitfield.imm64 = 1;
13882
13883       /* If not 64bit, sign/zero extend val, to account for wraparound
13884          when !BFD64.  */
13885       if (expr_mode == expr_operator_present
13886           && flag_code != CODE_64BIT && !object_64bit)
13887         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
13888     }
13889 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
13890   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
13891            && exp_seg != absolute_section
13892            && exp_seg != text_section
13893            && exp_seg != data_section
13894            && exp_seg != bss_section
13895            && exp_seg != undefined_section
13896            && !bfd_is_com_section (exp_seg))
13897     {
13898       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
13899       return 0;
13900     }
13901 #endif
13902   else
13903     {
13904       /* This is an address.  The size of the address will be
13905          determined later, depending on destination register,
13906          suffix, or the default for the section.  */
13907       i.types[this_operand].bitfield.imm8 = 1;
13908       i.types[this_operand].bitfield.imm16 = 1;
13909       i.types[this_operand].bitfield.imm32 = 1;
13910       i.types[this_operand].bitfield.imm32s = 1;
13911       i.types[this_operand].bitfield.imm64 = 1;
13912       i.types[this_operand] = operand_type_and (i.types[this_operand],
13913                                                 types);
13914     }
13915
13916   return 1;
13917 }
13918
13919 static char *
13920 i386_scale (char *scale)
13921 {
13922   offsetT val;
13923   char *save = input_line_pointer;
13924
13925   input_line_pointer = scale;
13926   val = get_absolute_expression ();
13927
13928   switch (val)
13929     {
13930     case 1:
13931       i.log2_scale_factor = 0;
13932       break;
13933     case 2:
13934       i.log2_scale_factor = 1;
13935       break;
13936     case 4:
13937       i.log2_scale_factor = 2;
13938       break;
13939     case 8:
13940       i.log2_scale_factor = 3;
13941       break;
13942     default:
13943       {
13944         char sep = *input_line_pointer;
13945
13946         *input_line_pointer = '\0';
13947         as_bad (_("expecting scale factor of 1, 2, 4, or 8: got `%s'"),
13948                 scale);
13949         *input_line_pointer = sep;
13950         input_line_pointer = save;
13951         return NULL;
13952       }
13953     }
13954   if (i.log2_scale_factor != 0 && i.index_reg == 0)
13955     {
13956       as_warn (_("scale factor of %d without an index register"),
13957                1 << i.log2_scale_factor);
13958       i.log2_scale_factor = 0;
13959     }
13960   scale = input_line_pointer;
13961   input_line_pointer = save;
13962   return scale;
13963 }
13964
13965 static int
13966 i386_displacement (char *disp_start, char *disp_end)
13967 {
13968   expressionS *exp;
13969   segT exp_seg = 0;
13970   char *save_input_line_pointer;
13971   char *gotfree_input_line;
13972   int override;
13973   i386_operand_type bigdisp, types = anydisp;
13974   int ret;
13975
13976   if (i.disp_operands == MAX_MEMORY_OPERANDS)
13977     {
13978       as_bad (_("at most %d displacement operands are allowed"),
13979               MAX_MEMORY_OPERANDS);
13980       return 0;
13981     }
13982
13983   operand_type_set (&bigdisp, 0);
13984   if (i.jumpabsolute
13985       || i.types[this_operand].bitfield.baseindex
13986       || (current_templates.start->opcode_modifier.jump != JUMP
13987           && current_templates.start->opcode_modifier.jump != JUMP_DWORD))
13988     {
13989       i386_addressing_mode ();
13990       override = (i.prefix[ADDR_PREFIX] != 0);
13991       if (flag_code == CODE_64BIT)
13992         {
13993           bigdisp.bitfield.disp32 = 1;
13994           if (!override)
13995             bigdisp.bitfield.disp64 = 1;
13996         }
13997       else if ((flag_code == CODE_16BIT) ^ override)
13998           bigdisp.bitfield.disp16 = 1;
13999       else
14000           bigdisp.bitfield.disp32 = 1;
14001     }
14002   else
14003     {
14004       /* For PC-relative branches, the width of the displacement may be
14005          dependent upon data size, but is never dependent upon address size.
14006          Also make sure to not unintentionally match against a non-PC-relative
14007          branch template.  */
14008       const insn_template *t = current_templates.start;
14009       bool has_intel64 = false;
14010
14011       while (++t < current_templates.end)
14012         {
14013           if (t->opcode_modifier.jump
14014               != current_templates.start->opcode_modifier.jump)
14015             break;
14016           if ((t->opcode_modifier.isa64 >= INTEL64))
14017             has_intel64 = true;
14018         }
14019       current_templates.end = t;
14020
14021       override = (i.prefix[DATA_PREFIX] != 0);
14022       if (flag_code == CODE_64BIT)
14023         {
14024           if ((override || i.suffix == WORD_MNEM_SUFFIX)
14025               && (!intel64 || !has_intel64))
14026             bigdisp.bitfield.disp16 = 1;
14027           else
14028             bigdisp.bitfield.disp32 = 1;
14029         }
14030       else
14031         {
14032           if (!override)
14033             override = (i.suffix == (flag_code != CODE_16BIT
14034                                      ? WORD_MNEM_SUFFIX
14035                                      : LONG_MNEM_SUFFIX));
14036           bigdisp.bitfield.disp32 = 1;
14037           if ((flag_code == CODE_16BIT) ^ override)
14038             {
14039               bigdisp.bitfield.disp32 = 0;
14040               bigdisp.bitfield.disp16 = 1;
14041             }
14042         }
14043     }
14044   i.types[this_operand] = operand_type_or (i.types[this_operand],
14045                                            bigdisp);
14046
14047   exp = &disp_expressions[i.disp_operands];
14048   i.op[this_operand].disps = exp;
14049   i.disp_operands++;
14050   save_input_line_pointer = input_line_pointer;
14051   input_line_pointer = disp_start;
14052   END_STRING_AND_SAVE (disp_end);
14053
14054 #ifndef GCC_ASM_O_HACK
14055 #define GCC_ASM_O_HACK 0
14056 #endif
14057 #if GCC_ASM_O_HACK
14058   END_STRING_AND_SAVE (disp_end + 1);
14059   if (i.types[this_operand].bitfield.baseIndex
14060       && displacement_string_end[-1] == '+')
14061     {
14062       /* This hack is to avoid a warning when using the "o"
14063          constraint within gcc asm statements.
14064          For instance:
14065
14066          #define _set_tssldt_desc(n,addr,limit,type) \
14067          __asm__ __volatile__ ( \
14068          "movw %w2,%0\n\t" \
14069          "movw %w1,2+%0\n\t" \
14070          "rorl $16,%1\n\t" \
14071          "movb %b1,4+%0\n\t" \
14072          "movb %4,5+%0\n\t" \
14073          "movb $0,6+%0\n\t" \
14074          "movb %h1,7+%0\n\t" \
14075          "rorl $16,%1" \
14076          : "=o"(*(n)) : "q" (addr), "ri"(limit), "i"(type))
14077
14078          This works great except that the output assembler ends
14079          up looking a bit weird if it turns out that there is
14080          no offset.  You end up producing code that looks like:
14081
14082          #APP
14083          movw $235,(%eax)
14084          movw %dx,2+(%eax)
14085          rorl $16,%edx
14086          movb %dl,4+(%eax)
14087          movb $137,5+(%eax)
14088          movb $0,6+(%eax)
14089          movb %dh,7+(%eax)
14090          rorl $16,%edx
14091          #NO_APP
14092
14093          So here we provide the missing zero.  */
14094
14095       *displacement_string_end = '0';
14096     }
14097 #endif
14098   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
14099   if (gotfree_input_line)
14100     input_line_pointer = gotfree_input_line;
14101
14102   expr_mode = expr_operator_none;
14103   exp_seg = expression (exp);
14104
14105   SKIP_WHITESPACE ();
14106   if (*input_line_pointer)
14107     as_bad (_("junk `%s' after expression"), input_line_pointer);
14108 #if GCC_ASM_O_HACK
14109   RESTORE_END_STRING (disp_end + 1);
14110 #endif
14111   input_line_pointer = save_input_line_pointer;
14112   if (gotfree_input_line)
14113     {
14114       free (gotfree_input_line);
14115
14116       if (exp->X_op == O_constant || exp->X_op == O_register)
14117         exp->X_op = O_illegal;
14118     }
14119
14120   ret = i386_finalize_displacement (exp_seg, exp, types, disp_start);
14121
14122   RESTORE_END_STRING (disp_end);
14123
14124   return ret;
14125 }
14126
14127 static int
14128 i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
14129                             i386_operand_type types, const char *disp_start)
14130 {
14131   int ret = 1;
14132
14133   /* We do this to make sure that the section symbol is in
14134      the symbol table.  We will ultimately change the relocation
14135      to be relative to the beginning of the section.  */
14136   if (i.reloc[this_operand] == BFD_RELOC_386_GOTOFF
14137       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL
14138       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
14139     {
14140       if (exp->X_op != O_symbol)
14141         goto inv_disp;
14142
14143       if (S_IS_LOCAL (exp->X_add_symbol)
14144           && S_GET_SEGMENT (exp->X_add_symbol) != undefined_section
14145           && S_GET_SEGMENT (exp->X_add_symbol) != expr_section)
14146         section_symbol (S_GET_SEGMENT (exp->X_add_symbol));
14147       exp->X_op = O_subtract;
14148       exp->X_op_symbol = GOT_symbol;
14149       if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL)
14150         i.reloc[this_operand] = BFD_RELOC_32_PCREL;
14151       else if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
14152         i.reloc[this_operand] = BFD_RELOC_64;
14153       else
14154         i.reloc[this_operand] = BFD_RELOC_32;
14155     }
14156
14157   else if (exp->X_op == O_absent
14158            || exp->X_op == O_illegal
14159            || exp->X_op == O_big)
14160     {
14161     inv_disp:
14162       as_bad (_("missing or invalid displacement expression `%s'"),
14163               disp_start);
14164       ret = 0;
14165     }
14166
14167   else if (exp->X_op == O_constant)
14168     {
14169       /* Sizing gets taken care of by optimize_disp().
14170
14171          If not 64bit, sign/zero extend val, to account for wraparound
14172          when !BFD64.  */
14173       if (expr_mode == expr_operator_present
14174           && flag_code != CODE_64BIT && !object_64bit)
14175         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
14176     }
14177
14178 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
14179   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
14180            && exp_seg != absolute_section
14181            && exp_seg != text_section
14182            && exp_seg != data_section
14183            && exp_seg != bss_section
14184            && exp_seg != undefined_section
14185            && !bfd_is_com_section (exp_seg))
14186     {
14187       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
14188       ret = 0;
14189     }
14190 #endif
14191
14192   else if (current_templates.start->opcode_modifier.jump == JUMP_BYTE)
14193     i.types[this_operand].bitfield.disp8 = 1;
14194
14195   /* Check if this is a displacement only operand.  */
14196   if (!i.types[this_operand].bitfield.baseindex)
14197     i.types[this_operand] =
14198       operand_type_or (operand_type_and_not (i.types[this_operand], anydisp),
14199                        operand_type_and (i.types[this_operand], types));
14200
14201   return ret;
14202 }
14203
14204 /* Return the active addressing mode, taking address override and
14205    registers forming the address into consideration.  Update the
14206    address override prefix if necessary.  */
14207
14208 static enum flag_code
14209 i386_addressing_mode (void)
14210 {
14211   enum flag_code addr_mode;
14212
14213   if (i.prefix[ADDR_PREFIX])
14214     addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
14215   else if (flag_code == CODE_16BIT
14216            && is_cpu (current_templates.start, CpuMPX)
14217            /* Avoid replacing the "16-bit addressing not allowed" diagnostic
14218               from md_assemble() by "is not a valid base/index expression"
14219               when there is a base and/or index.  */
14220            && !i.types[this_operand].bitfield.baseindex)
14221     {
14222       /* MPX insn memory operands with neither base nor index must be forced
14223          to use 32-bit addressing in 16-bit mode.  */
14224       addr_mode = CODE_32BIT;
14225       i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
14226       ++i.prefixes;
14227       gas_assert (!i.types[this_operand].bitfield.disp16);
14228       gas_assert (!i.types[this_operand].bitfield.disp32);
14229     }
14230   else
14231     {
14232       addr_mode = flag_code;
14233
14234 #if INFER_ADDR_PREFIX
14235       if (i.mem_operands == 0)
14236         {
14237           /* Infer address prefix from the first memory operand.  */
14238           const reg_entry *addr_reg = i.base_reg;
14239
14240           if (addr_reg == NULL)
14241             addr_reg = i.index_reg;
14242
14243           if (addr_reg)
14244             {
14245               if (addr_reg->reg_type.bitfield.dword)
14246                 addr_mode = CODE_32BIT;
14247               else if (flag_code != CODE_64BIT
14248                        && addr_reg->reg_type.bitfield.word)
14249                 addr_mode = CODE_16BIT;
14250
14251               if (addr_mode != flag_code)
14252                 {
14253                   i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
14254                   i.prefixes += 1;
14255                   /* Change the size of any displacement too.  At most one
14256                      of Disp16 or Disp32 is set.
14257                      FIXME.  There doesn't seem to be any real need for
14258                      separate Disp16 and Disp32 flags.  The same goes for
14259                      Imm16 and Imm32.  Removing them would probably clean
14260                      up the code quite a lot.  */
14261                   if (flag_code != CODE_64BIT
14262                       && (i.types[this_operand].bitfield.disp16
14263                           || i.types[this_operand].bitfield.disp32))
14264                     {
14265                       static const i386_operand_type disp16_32 = {
14266                         .bitfield = { .disp16 = 1, .disp32 = 1 }
14267                       };
14268
14269                       i.types[this_operand]
14270                         = operand_type_xor (i.types[this_operand], disp16_32);
14271                     }
14272                 }
14273             }
14274         }
14275 #endif
14276     }
14277
14278   return addr_mode;
14279 }
14280
14281 /* Make sure the memory operand we've been dealt is valid.
14282    Return 1 on success, 0 on a failure.  */
14283
14284 static int
14285 i386_index_check (const char *operand_string)
14286 {
14287   const char *kind = "base/index";
14288   enum flag_code addr_mode = i386_addressing_mode ();
14289   const insn_template *t = current_templates.end - 1;
14290
14291   if (t->opcode_modifier.isstring)
14292     {
14293       /* Memory operands of string insns are special in that they only allow
14294          a single register (rDI, rSI, or rBX) as their memory address.  */
14295       const reg_entry *expected_reg;
14296       static const char di_si[][2][4] =
14297         {
14298           { "esi", "edi" },
14299           { "si", "di" },
14300           { "rsi", "rdi" }
14301         };
14302       static const char bx[][4] = { "ebx", "bx", "rbx" };
14303
14304       kind = "string address";
14305
14306       if (t->opcode_modifier.prefixok == PrefixRep)
14307         {
14308           int es_op = t->opcode_modifier.isstring - IS_STRING_ES_OP0;
14309           int op = 0;
14310
14311           if (!t->operand_types[0].bitfield.baseindex
14312               || ((!i.mem_operands != !intel_syntax)
14313                   && t->operand_types[1].bitfield.baseindex))
14314             op = 1;
14315           expected_reg
14316             = (const reg_entry *) str_hash_find (reg_hash,
14317                                                  di_si[addr_mode][op == es_op]);
14318         }
14319       else
14320         expected_reg
14321           = (const reg_entry *)str_hash_find (reg_hash, bx[addr_mode]);
14322
14323       if (i.base_reg != expected_reg
14324           || i.index_reg
14325           || operand_type_check (i.types[this_operand], disp))
14326         {
14327           /* The second memory operand must have the same size as
14328              the first one.  */
14329           if (i.mem_operands
14330               && i.base_reg
14331               && !((addr_mode == CODE_64BIT
14332                     && i.base_reg->reg_type.bitfield.qword)
14333                    || (addr_mode == CODE_32BIT
14334                        ? i.base_reg->reg_type.bitfield.dword
14335                        : i.base_reg->reg_type.bitfield.word)))
14336             goto bad_address;
14337
14338           as_warn (_("`%s' is not valid here (expected `%c%s%s%c')"),
14339                    operand_string,
14340                    intel_syntax ? '[' : '(',
14341                    register_prefix,
14342                    expected_reg->reg_name,
14343                    intel_syntax ? ']' : ')');
14344           return 1;
14345         }
14346       else
14347         return 1;
14348
14349     bad_address:
14350       as_bad (_("`%s' is not a valid %s expression"),
14351               operand_string, kind);
14352       return 0;
14353     }
14354   else
14355     {
14356       t = current_templates.start;
14357
14358       if (addr_mode != CODE_16BIT)
14359         {
14360           /* 32-bit/64-bit checks.  */
14361           if (i.disp_encoding == disp_encoding_16bit)
14362             {
14363             bad_disp:
14364               as_bad (_("invalid `%s' prefix"),
14365                       addr_mode == CODE_16BIT ? "{disp32}" : "{disp16}");
14366               return 0;
14367             }
14368
14369           if ((i.base_reg
14370                && ((addr_mode == CODE_64BIT
14371                     ? !i.base_reg->reg_type.bitfield.qword
14372                     : !i.base_reg->reg_type.bitfield.dword)
14373                    || (i.index_reg && i.base_reg->reg_num == RegIP)
14374                    || i.base_reg->reg_num == RegIZ))
14375               || (i.index_reg
14376                   && !i.index_reg->reg_type.bitfield.xmmword
14377                   && !i.index_reg->reg_type.bitfield.ymmword
14378                   && !i.index_reg->reg_type.bitfield.zmmword
14379                   && ((addr_mode == CODE_64BIT
14380                        ? !i.index_reg->reg_type.bitfield.qword
14381                        : !i.index_reg->reg_type.bitfield.dword)
14382                       || !i.index_reg->reg_type.bitfield.baseindex)))
14383             goto bad_address;
14384
14385           /* bndmk, bndldx, bndstx and mandatory non-vector SIB have special restrictions. */
14386           if (t->mnem_off == MN_bndmk
14387               || t->mnem_off == MN_bndldx
14388               || t->mnem_off == MN_bndstx
14389               || t->opcode_modifier.sib == SIBMEM)
14390             {
14391               /* They cannot use RIP-relative addressing. */
14392               if (i.base_reg && i.base_reg->reg_num == RegIP)
14393                 {
14394                   as_bad (_("`%s' cannot be used here"), operand_string);
14395                   return 0;
14396                 }
14397
14398               /* bndldx and bndstx ignore their scale factor. */
14399               if ((t->mnem_off == MN_bndldx || t->mnem_off == MN_bndstx)
14400                   && i.log2_scale_factor)
14401                 as_warn (_("register scaling is being ignored here"));
14402             }
14403         }
14404       else
14405         {
14406           /* 16-bit checks.  */
14407           if (i.disp_encoding == disp_encoding_32bit)
14408             goto bad_disp;
14409
14410           if ((i.base_reg
14411                && (!i.base_reg->reg_type.bitfield.word
14412                    || !i.base_reg->reg_type.bitfield.baseindex))
14413               || (i.index_reg
14414                   && (!i.index_reg->reg_type.bitfield.word
14415                       || !i.index_reg->reg_type.bitfield.baseindex
14416                       || !(i.base_reg
14417                            && i.base_reg->reg_num < 6
14418                            && i.index_reg->reg_num >= 6
14419                            && i.log2_scale_factor == 0))))
14420             goto bad_address;
14421         }
14422     }
14423   return 1;
14424 }
14425
14426 /* Handle vector immediates.  */
14427
14428 static int
14429 RC_SAE_immediate (const char *imm_start)
14430 {
14431   const char *pstr = imm_start;
14432
14433   if (*pstr != '{')
14434     return 0;
14435
14436   pstr = RC_SAE_specifier (pstr + 1);
14437   if (pstr == NULL)
14438     return 0;
14439
14440   if (*pstr++ != '}')
14441     {
14442       as_bad (_("Missing '}': '%s'"), imm_start);
14443       return 0;
14444     }
14445   /* RC/SAE immediate string should contain nothing more.  */;
14446   if (*pstr != 0)
14447     {
14448       as_bad (_("Junk after '}': '%s'"), imm_start);
14449       return 0;
14450     }
14451
14452   /* Internally this doesn't count as an operand.  */
14453   --i.operands;
14454
14455   return 1;
14456 }
14457
14458 static INLINE bool starts_memory_operand (char c)
14459 {
14460   return ISDIGIT (c)
14461          || is_name_beginner (c)
14462          || strchr ("([\"+-!~", c);
14463 }
14464
14465 /* Parse OPERAND_STRING into the i386_insn structure I.  Returns zero
14466    on error.  */
14467
14468 static int
14469 i386_att_operand (char *operand_string)
14470 {
14471   const reg_entry *r;
14472   char *end_op;
14473   char *op_string = operand_string;
14474
14475   if (is_space_char (*op_string))
14476     ++op_string;
14477
14478   /* We check for an absolute prefix (differentiating,
14479      for example, 'jmp pc_relative_label' from 'jmp *absolute_label'.  */
14480   if (*op_string == ABSOLUTE_PREFIX
14481       && current_templates.start->opcode_modifier.jump)
14482     {
14483       ++op_string;
14484       if (is_space_char (*op_string))
14485         ++op_string;
14486       i.jumpabsolute = true;
14487     }
14488
14489   /* Check if operand is a register.  */
14490   if ((r = parse_register (op_string, &end_op)) != NULL)
14491     {
14492       i386_operand_type temp;
14493
14494       if (r == &bad_reg)
14495         return 0;
14496
14497       /* Check for a segment override by searching for ':' after a
14498          segment register.  */
14499       op_string = end_op;
14500       if (is_space_char (*op_string))
14501         ++op_string;
14502       if (*op_string == ':' && r->reg_type.bitfield.class == SReg)
14503         {
14504           i.seg[i.mem_operands] = r;
14505
14506           /* Skip the ':' and whitespace.  */
14507           ++op_string;
14508           if (is_space_char (*op_string))
14509             ++op_string;
14510
14511           /* Handle case of %es:*foo.  */
14512           if (!i.jumpabsolute && *op_string == ABSOLUTE_PREFIX
14513               && current_templates.start->opcode_modifier.jump)
14514             {
14515               ++op_string;
14516               if (is_space_char (*op_string))
14517                 ++op_string;
14518               i.jumpabsolute = true;
14519             }
14520
14521           if (!starts_memory_operand (*op_string))
14522             {
14523               as_bad (_("bad memory operand `%s'"), op_string);
14524               return 0;
14525             }
14526           goto do_memory_reference;
14527         }
14528
14529       /* Handle vector operations.  */
14530       if (*op_string == '{')
14531         {
14532           op_string = check_VecOperations (op_string);
14533           if (op_string == NULL)
14534             return 0;
14535         }
14536
14537       if (*op_string)
14538         {
14539           as_bad (_("junk `%s' after register"), op_string);
14540           return 0;
14541         }
14542
14543        /* Reject pseudo registers for .insn.  */
14544       if (dot_insn () && r->reg_type.bitfield.class == ClassNone)
14545         {
14546           as_bad (_("`%s%s' cannot be used here"),
14547                   register_prefix, r->reg_name);
14548           return 0;
14549         }
14550
14551       temp = r->reg_type;
14552       temp.bitfield.baseindex = 0;
14553       i.types[this_operand] = operand_type_or (i.types[this_operand],
14554                                                temp);
14555       i.types[this_operand].bitfield.unspecified = 0;
14556       i.op[this_operand].regs = r;
14557       i.reg_operands++;
14558
14559       /* A GPR may follow an RC or SAE immediate only if a (vector) register
14560          operand was also present earlier on.  */
14561       if (i.rounding.type != rc_none && temp.bitfield.class == Reg
14562           && i.reg_operands == 1)
14563         {
14564           unsigned int j;
14565
14566           for (j = 0; j < ARRAY_SIZE (RC_NamesTable); ++j)
14567             if (i.rounding.type == RC_NamesTable[j].type)
14568               break;
14569           as_bad (_("`%s': misplaced `{%s}'"),
14570                   insn_name (current_templates.start), RC_NamesTable[j].name);
14571           return 0;
14572         }
14573     }
14574   else if (*op_string == REGISTER_PREFIX)
14575     {
14576       as_bad (_("bad register name `%s'"), op_string);
14577       return 0;
14578     }
14579   else if (*op_string == IMMEDIATE_PREFIX)
14580     {
14581       ++op_string;
14582       if (i.jumpabsolute)
14583         {
14584           as_bad (_("immediate operand illegal with absolute jump"));
14585           return 0;
14586         }
14587       if (!i386_immediate (op_string))
14588         return 0;
14589       if (i.rounding.type != rc_none)
14590         {
14591           as_bad (_("`%s': RC/SAE operand must follow immediate operands"),
14592                   insn_name (current_templates.start));
14593           return 0;
14594         }
14595     }
14596   else if (RC_SAE_immediate (operand_string))
14597     {
14598       /* If it is a RC or SAE immediate, do the necessary placement check:
14599          Only another immediate or a GPR may precede it.  */
14600       if (i.mem_operands || i.reg_operands + i.imm_operands > 1
14601           || (i.reg_operands == 1
14602               && i.op[0].regs->reg_type.bitfield.class != Reg))
14603         {
14604           as_bad (_("`%s': misplaced `%s'"),
14605                   insn_name (current_templates.start), operand_string);
14606           return 0;
14607         }
14608     }
14609   else if (starts_memory_operand (*op_string))
14610     {
14611       /* This is a memory reference of some sort.  */
14612       char *base_string;
14613
14614       /* Start and end of displacement string expression (if found).  */
14615       char *displacement_string_start;
14616       char *displacement_string_end;
14617
14618     do_memory_reference:
14619       /* Check for base index form.  We detect the base index form by
14620          looking for an ')' at the end of the operand, searching
14621          for the '(' matching it, and finding a REGISTER_PREFIX or ','
14622          after the '('.  */
14623       base_string = op_string + strlen (op_string);
14624
14625       /* Handle vector operations.  */
14626       --base_string;
14627       if (is_space_char (*base_string))
14628         --base_string;
14629
14630       if (*base_string == '}')
14631         {
14632           char *vop_start = NULL;
14633
14634           while (base_string-- > op_string)
14635             {
14636               if (*base_string == '"')
14637                 break;
14638               if (*base_string != '{')
14639                 continue;
14640
14641               vop_start = base_string;
14642
14643               --base_string;
14644               if (is_space_char (*base_string))
14645                 --base_string;
14646
14647               if (*base_string != '}')
14648                 break;
14649
14650               vop_start = NULL;
14651             }
14652
14653           if (!vop_start)
14654             {
14655               as_bad (_("unbalanced figure braces"));
14656               return 0;
14657             }
14658
14659           if (check_VecOperations (vop_start) == NULL)
14660             return 0;
14661         }
14662
14663       /* If we only have a displacement, set-up for it to be parsed later.  */
14664       displacement_string_start = op_string;
14665       displacement_string_end = base_string + 1;
14666
14667       if (*base_string == ')')
14668         {
14669           char *temp_string;
14670           unsigned int parens_not_balanced = 0;
14671           bool in_quotes = false;
14672
14673           /* We've already checked that the number of left & right ()'s are
14674              equal, and that there's a matching set of double quotes.  */
14675           end_op = base_string;
14676           for (temp_string = op_string; temp_string < end_op; temp_string++)
14677             {
14678               if (*temp_string == '\\' && temp_string[1] == '"')
14679                 ++temp_string;
14680               else if (*temp_string == '"')
14681                 in_quotes = !in_quotes;
14682               else if (!in_quotes)
14683                 {
14684                   if (*temp_string == '(' && !parens_not_balanced++)
14685                     base_string = temp_string;
14686                   if (*temp_string == ')')
14687                     --parens_not_balanced;
14688                 }
14689             }
14690
14691           temp_string = base_string;
14692
14693           /* Skip past '(' and whitespace.  */
14694           gas_assert (*base_string == '(');
14695           ++base_string;
14696           if (is_space_char (*base_string))
14697             ++base_string;
14698
14699           if (*base_string == ','
14700               || ((i.base_reg = parse_register (base_string, &end_op))
14701                   != NULL))
14702             {
14703               displacement_string_end = temp_string;
14704
14705               i.types[this_operand].bitfield.baseindex = 1;
14706
14707               if (i.base_reg)
14708                 {
14709                   if (i.base_reg == &bad_reg)
14710                     return 0;
14711                   base_string = end_op;
14712                   if (is_space_char (*base_string))
14713                     ++base_string;
14714                 }
14715
14716               /* There may be an index reg or scale factor here.  */
14717               if (*base_string == ',')
14718                 {
14719                   ++base_string;
14720                   if (is_space_char (*base_string))
14721                     ++base_string;
14722
14723                   if ((i.index_reg = parse_register (base_string, &end_op))
14724                       != NULL)
14725                     {
14726                       if (i.index_reg == &bad_reg)
14727                         return 0;
14728                       base_string = end_op;
14729                       if (is_space_char (*base_string))
14730                         ++base_string;
14731                       if (*base_string == ',')
14732                         {
14733                           ++base_string;
14734                           if (is_space_char (*base_string))
14735                             ++base_string;
14736                         }
14737                       else if (*base_string != ')')
14738                         {
14739                           as_bad (_("expecting `,' or `)' "
14740                                     "after index register in `%s'"),
14741                                   operand_string);
14742                           return 0;
14743                         }
14744                     }
14745                   else if (*base_string == REGISTER_PREFIX)
14746                     {
14747                       end_op = strchr (base_string, ',');
14748                       if (end_op)
14749                         *end_op = '\0';
14750                       as_bad (_("bad register name `%s'"), base_string);
14751                       return 0;
14752                     }
14753
14754                   /* Check for scale factor.  */
14755                   if (*base_string != ')')
14756                     {
14757                       char *end_scale = i386_scale (base_string);
14758
14759                       if (!end_scale)
14760                         return 0;
14761
14762                       base_string = end_scale;
14763                       if (is_space_char (*base_string))
14764                         ++base_string;
14765                       if (*base_string != ')')
14766                         {
14767                           as_bad (_("expecting `)' "
14768                                     "after scale factor in `%s'"),
14769                                   operand_string);
14770                           return 0;
14771                         }
14772                     }
14773                   else if (!i.index_reg)
14774                     {
14775                       as_bad (_("expecting index register or scale factor "
14776                                 "after `,'; got '%c'"),
14777                               *base_string);
14778                       return 0;
14779                     }
14780                 }
14781               else if (*base_string != ')')
14782                 {
14783                   as_bad (_("expecting `,' or `)' "
14784                             "after base register in `%s'"),
14785                           operand_string);
14786                   return 0;
14787                 }
14788             }
14789           else if (*base_string == REGISTER_PREFIX)
14790             {
14791               end_op = strchr (base_string, ',');
14792               if (end_op)
14793                 *end_op = '\0';
14794               as_bad (_("bad register name `%s'"), base_string);
14795               return 0;
14796             }
14797         }
14798
14799       /* If there's an expression beginning the operand, parse it,
14800          assuming displacement_string_start and
14801          displacement_string_end are meaningful.  */
14802       if (displacement_string_start != displacement_string_end)
14803         {
14804           if (!i386_displacement (displacement_string_start,
14805                                   displacement_string_end))
14806             return 0;
14807         }
14808
14809       /* Special case for (%dx) while doing input/output op.  */
14810       if (i.base_reg
14811           && i.base_reg->reg_type.bitfield.instance == RegD
14812           && i.base_reg->reg_type.bitfield.word
14813           && i.index_reg == 0
14814           && i.log2_scale_factor == 0
14815           && i.seg[i.mem_operands] == 0
14816           && !operand_type_check (i.types[this_operand], disp))
14817         {
14818           i.types[this_operand] = i.base_reg->reg_type;
14819           i.op[this_operand].regs = i.base_reg;
14820           i.base_reg = NULL;
14821           i.input_output_operand = true;
14822           return 1;
14823         }
14824
14825       if (i386_index_check (operand_string) == 0)
14826         return 0;
14827       i.flags[this_operand] |= Operand_Mem;
14828       i.mem_operands++;
14829     }
14830   else
14831     {
14832       /* It's not a memory operand; argh!  */
14833       as_bad (_("invalid char %s beginning operand %d `%s'"),
14834               output_invalid (*op_string),
14835               this_operand + 1,
14836               op_string);
14837       return 0;
14838     }
14839   return 1;                     /* Normal return.  */
14840 }
14841 \f
14842 /* Calculate the maximum variable size (i.e., excluding fr_fix)
14843    that an rs_machine_dependent frag may reach.  */
14844
14845 unsigned int
14846 i386_frag_max_var (fragS *frag)
14847 {
14848   /* The only relaxable frags are for jumps.
14849      Unconditional jumps can grow by 4 bytes and others by 5 bytes.  */
14850   gas_assert (frag->fr_type == rs_machine_dependent);
14851   return TYPE_FROM_RELAX_STATE (frag->fr_subtype) == UNCOND_JUMP ? 4 : 5;
14852 }
14853
14854 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14855 static int
14856 elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var)
14857 {
14858   /* STT_GNU_IFUNC symbol must go through PLT.  */
14859   if ((symbol_get_bfdsym (fr_symbol)->flags
14860        & BSF_GNU_INDIRECT_FUNCTION) != 0)
14861     return 0;
14862
14863   if (!S_IS_EXTERNAL (fr_symbol))
14864     /* Symbol may be weak or local.  */
14865     return !S_IS_WEAK (fr_symbol);
14866
14867   /* Global symbols with non-default visibility can't be preempted. */
14868   if (ELF_ST_VISIBILITY (S_GET_OTHER (fr_symbol)) != STV_DEFAULT)
14869     return 1;
14870
14871   if (fr_var != NO_RELOC)
14872     switch ((enum bfd_reloc_code_real) fr_var)
14873       {
14874       case BFD_RELOC_386_PLT32:
14875       case BFD_RELOC_X86_64_PLT32:
14876         /* Symbol with PLT relocation may be preempted. */
14877         return 0;
14878       default:
14879         abort ();
14880       }
14881
14882   /* Global symbols with default visibility in a shared library may be
14883      preempted by another definition.  */
14884   return !shared;
14885 }
14886 #endif
14887
14888 /* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
14889    Note also work for Skylake and Cascadelake.
14890 ---------------------------------------------------------------------
14891 |   JCC   | ADD/SUB/CMP | INC/DEC | TEST/AND |
14892 | ------  | ----------- | ------- | -------- |
14893 |   Jo    |      N      |    N    |     Y    |
14894 |   Jno   |      N      |    N    |     Y    |
14895 |  Jc/Jb  |      Y      |    N    |     Y    |
14896 | Jae/Jnb |      Y      |    N    |     Y    |
14897 |  Je/Jz  |      Y      |    Y    |     Y    |
14898 | Jne/Jnz |      Y      |    Y    |     Y    |
14899 | Jna/Jbe |      Y      |    N    |     Y    |
14900 | Ja/Jnbe |      Y      |    N    |     Y    |
14901 |   Js    |      N      |    N    |     Y    |
14902 |   Jns   |      N      |    N    |     Y    |
14903 |  Jp/Jpe |      N      |    N    |     Y    |
14904 | Jnp/Jpo |      N      |    N    |     Y    |
14905 | Jl/Jnge |      Y      |    Y    |     Y    |
14906 | Jge/Jnl |      Y      |    Y    |     Y    |
14907 | Jle/Jng |      Y      |    Y    |     Y    |
14908 | Jg/Jnle |      Y      |    Y    |     Y    |
14909 ---------------------------------------------------------------------  */
14910 static int
14911 i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
14912 {
14913   if (mf_cmp == mf_cmp_alu_cmp)
14914     return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
14915             || mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
14916   if (mf_cmp == mf_cmp_incdec)
14917     return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
14918             || mf_jcc == mf_jcc_jle);
14919   if (mf_cmp == mf_cmp_test_and)
14920     return 1;
14921   return 0;
14922 }
14923
14924 /* Return the next non-empty frag.  */
14925
14926 static fragS *
14927 i386_next_non_empty_frag (fragS *fragP)
14928 {
14929   /* There may be a frag with a ".fill 0" when there is no room in
14930      the current frag for frag_grow in output_insn.  */
14931   for (fragP = fragP->fr_next;
14932        (fragP != NULL
14933         && fragP->fr_type == rs_fill
14934         && fragP->fr_fix == 0);
14935        fragP = fragP->fr_next)
14936     ;
14937   return fragP;
14938 }
14939
14940 /* Return the next jcc frag after BRANCH_PADDING.  */
14941
14942 static fragS *
14943 i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
14944 {
14945   fragS *branch_fragP;
14946   if (!pad_fragP)
14947     return NULL;
14948
14949   if (pad_fragP->fr_type == rs_machine_dependent
14950       && (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
14951           == BRANCH_PADDING))
14952     {
14953       branch_fragP = i386_next_non_empty_frag (pad_fragP);
14954       if (branch_fragP->fr_type != rs_machine_dependent)
14955         return NULL;
14956       if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
14957           && i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
14958                                    pad_fragP->tc_frag_data.mf_type))
14959         return branch_fragP;
14960     }
14961
14962   return NULL;
14963 }
14964
14965 /* Classify BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags.  */
14966
14967 static void
14968 i386_classify_machine_dependent_frag (fragS *fragP)
14969 {
14970   fragS *cmp_fragP;
14971   fragS *pad_fragP;
14972   fragS *branch_fragP;
14973   fragS *next_fragP;
14974   unsigned int max_prefix_length;
14975
14976   if (fragP->tc_frag_data.classified)
14977     return;
14978
14979   /* First scan for BRANCH_PADDING and FUSED_JCC_PADDING.  Convert
14980      FUSED_JCC_PADDING and merge BRANCH_PADDING.  */
14981   for (next_fragP = fragP;
14982        next_fragP != NULL;
14983        next_fragP = next_fragP->fr_next)
14984     {
14985       next_fragP->tc_frag_data.classified = 1;
14986       if (next_fragP->fr_type == rs_machine_dependent)
14987         switch (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype))
14988           {
14989           case BRANCH_PADDING:
14990             /* The BRANCH_PADDING frag must be followed by a branch
14991                frag.  */
14992             branch_fragP = i386_next_non_empty_frag (next_fragP);
14993             next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
14994             break;
14995           case FUSED_JCC_PADDING:
14996             /* Check if this is a fused jcc:
14997                FUSED_JCC_PADDING
14998                CMP like instruction
14999                BRANCH_PADDING
15000                COND_JUMP
15001                */
15002             cmp_fragP = i386_next_non_empty_frag (next_fragP);
15003             pad_fragP = i386_next_non_empty_frag (cmp_fragP);
15004             branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
15005             if (branch_fragP)
15006               {
15007                 /* The BRANCH_PADDING frag is merged with the
15008                    FUSED_JCC_PADDING frag.  */
15009                 next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
15010                 /* CMP like instruction size.  */
15011                 next_fragP->tc_frag_data.cmp_size = cmp_fragP->fr_fix;
15012                 frag_wane (pad_fragP);
15013                 /* Skip to branch_fragP.  */
15014                 next_fragP = branch_fragP;
15015               }
15016             else if (next_fragP->tc_frag_data.max_prefix_length)
15017               {
15018                 /* Turn FUSED_JCC_PADDING into BRANCH_PREFIX if it isn't
15019                    a fused jcc.  */
15020                 next_fragP->fr_subtype
15021                   = ENCODE_RELAX_STATE (BRANCH_PREFIX, 0);
15022                 next_fragP->tc_frag_data.max_bytes
15023                   = next_fragP->tc_frag_data.max_prefix_length;
15024                 /* This will be updated in the BRANCH_PREFIX scan.  */
15025                 next_fragP->tc_frag_data.max_prefix_length = 0;
15026               }
15027             else
15028               frag_wane (next_fragP);
15029             break;
15030           }
15031     }
15032
15033   /* Stop if there is no BRANCH_PREFIX.  */
15034   if (!align_branch_prefix_size)
15035     return;
15036
15037   /* Scan for BRANCH_PREFIX.  */
15038   for (; fragP != NULL; fragP = fragP->fr_next)
15039     {
15040       if (fragP->fr_type != rs_machine_dependent
15041           || (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
15042               != BRANCH_PREFIX))
15043         continue;
15044
15045       /* Count all BRANCH_PREFIX frags before BRANCH_PADDING and
15046          COND_JUMP_PREFIX.  */
15047       max_prefix_length = 0;
15048       for (next_fragP = fragP;
15049            next_fragP != NULL;
15050            next_fragP = next_fragP->fr_next)
15051         {
15052           if (next_fragP->fr_type == rs_fill)
15053             /* Skip rs_fill frags.  */
15054             continue;
15055           else if (next_fragP->fr_type != rs_machine_dependent)
15056             /* Stop for all other frags.  */
15057             break;
15058
15059           /* rs_machine_dependent frags.  */
15060           if (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
15061               == BRANCH_PREFIX)
15062             {
15063               /* Count BRANCH_PREFIX frags.  */
15064               if (max_prefix_length >= MAX_FUSED_JCC_PADDING_SIZE)
15065                 {
15066                   max_prefix_length = MAX_FUSED_JCC_PADDING_SIZE;
15067                   frag_wane (next_fragP);
15068                 }
15069               else
15070                 max_prefix_length
15071                   += next_fragP->tc_frag_data.max_bytes;
15072             }
15073           else if ((TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
15074                     == BRANCH_PADDING)
15075                    || (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
15076                        == FUSED_JCC_PADDING))
15077             {
15078               /* Stop at BRANCH_PADDING and FUSED_JCC_PADDING.  */
15079               fragP->tc_frag_data.u.padding_fragP = next_fragP;
15080               break;
15081             }
15082           else
15083             /* Stop for other rs_machine_dependent frags.  */
15084             break;
15085         }
15086
15087       fragP->tc_frag_data.max_prefix_length = max_prefix_length;
15088
15089       /* Skip to the next frag.  */
15090       fragP = next_fragP;
15091     }
15092 }
15093
15094 /* Compute padding size for
15095
15096         FUSED_JCC_PADDING
15097         CMP like instruction
15098         BRANCH_PADDING
15099         COND_JUMP/UNCOND_JUMP
15100
15101    or
15102
15103         BRANCH_PADDING
15104         COND_JUMP/UNCOND_JUMP
15105  */
15106
15107 static int
15108 i386_branch_padding_size (fragS *fragP, offsetT address)
15109 {
15110   unsigned int offset, size, padding_size;
15111   fragS *branch_fragP = fragP->tc_frag_data.u.branch_fragP;
15112
15113   /* The start address of the BRANCH_PADDING or FUSED_JCC_PADDING frag.  */
15114   if (!address)
15115     address = fragP->fr_address;
15116   address += fragP->fr_fix;
15117
15118   /* CMP like instrunction size.  */
15119   size = fragP->tc_frag_data.cmp_size;
15120
15121   /* The base size of the branch frag.  */
15122   size += branch_fragP->fr_fix;
15123
15124   /* Add opcode and displacement bytes for the rs_machine_dependent
15125      branch frag.  */
15126   if (branch_fragP->fr_type == rs_machine_dependent)
15127     size += md_relax_table[branch_fragP->fr_subtype].rlx_length;
15128
15129   /* Check if branch is within boundary and doesn't end at the last
15130      byte.  */
15131   offset = address & ((1U << align_branch_power) - 1);
15132   if ((offset + size) >= (1U << align_branch_power))
15133     /* Padding needed to avoid crossing boundary.  */
15134     padding_size = (1U << align_branch_power) - offset;
15135   else
15136     /* No padding needed.  */
15137     padding_size = 0;
15138
15139   /* The return value may be saved in tc_frag_data.length which is
15140      unsigned byte.  */
15141   if (!fits_in_unsigned_byte (padding_size))
15142     abort ();
15143
15144   return padding_size;
15145 }
15146
15147 /* i386_generic_table_relax_frag()
15148
15149    Handle BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags to
15150    grow/shrink padding to align branch frags.  Hand others to
15151    relax_frag().  */
15152
15153 long
15154 i386_generic_table_relax_frag (segT segment, fragS *fragP, long stretch)
15155 {
15156   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
15157       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
15158     {
15159       long padding_size = i386_branch_padding_size (fragP, 0);
15160       long grow = padding_size - fragP->tc_frag_data.length;
15161
15162       /* When the BRANCH_PREFIX frag is used, the computed address
15163          must match the actual address and there should be no padding.  */
15164       if (fragP->tc_frag_data.padding_address
15165           && (fragP->tc_frag_data.padding_address != fragP->fr_address
15166               || padding_size))
15167         abort ();
15168
15169       /* Update the padding size.  */
15170       if (grow)
15171         fragP->tc_frag_data.length = padding_size;
15172
15173       return grow;
15174     }
15175   else if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
15176     {
15177       fragS *padding_fragP, *next_fragP;
15178       long padding_size, left_size, last_size;
15179
15180       padding_fragP = fragP->tc_frag_data.u.padding_fragP;
15181       if (!padding_fragP)
15182         /* Use the padding set by the leading BRANCH_PREFIX frag.  */
15183         return (fragP->tc_frag_data.length
15184                 - fragP->tc_frag_data.last_length);
15185
15186       /* Compute the relative address of the padding frag in the very
15187         first time where the BRANCH_PREFIX frag sizes are zero.  */
15188       if (!fragP->tc_frag_data.padding_address)
15189         fragP->tc_frag_data.padding_address
15190           = padding_fragP->fr_address - (fragP->fr_address - stretch);
15191
15192       /* First update the last length from the previous interation.  */
15193       left_size = fragP->tc_frag_data.prefix_length;
15194       for (next_fragP = fragP;
15195            next_fragP != padding_fragP;
15196            next_fragP = next_fragP->fr_next)
15197         if (next_fragP->fr_type == rs_machine_dependent
15198             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
15199                 == BRANCH_PREFIX))
15200           {
15201             if (left_size)
15202               {
15203                 int max = next_fragP->tc_frag_data.max_bytes;
15204                 if (max)
15205                   {
15206                     int size;
15207                     if (max > left_size)
15208                       size = left_size;
15209                     else
15210                       size = max;
15211                     left_size -= size;
15212                     next_fragP->tc_frag_data.last_length = size;
15213                   }
15214               }
15215             else
15216               next_fragP->tc_frag_data.last_length = 0;
15217           }
15218
15219       /* Check the padding size for the padding frag.  */
15220       padding_size = i386_branch_padding_size
15221         (padding_fragP, (fragP->fr_address
15222                          + fragP->tc_frag_data.padding_address));
15223
15224       last_size = fragP->tc_frag_data.prefix_length;
15225       /* Check if there is change from the last interation.  */
15226       if (padding_size == last_size)
15227         {
15228           /* Update the expected address of the padding frag.  */
15229           padding_fragP->tc_frag_data.padding_address
15230             = (fragP->fr_address + padding_size
15231                + fragP->tc_frag_data.padding_address);
15232           return 0;
15233         }
15234
15235       if (padding_size > fragP->tc_frag_data.max_prefix_length)
15236         {
15237           /* No padding if there is no sufficient room.  Clear the
15238              expected address of the padding frag.  */
15239           padding_fragP->tc_frag_data.padding_address = 0;
15240           padding_size = 0;
15241         }
15242       else
15243         /* Store the expected address of the padding frag.  */
15244         padding_fragP->tc_frag_data.padding_address
15245           = (fragP->fr_address + padding_size
15246              + fragP->tc_frag_data.padding_address);
15247
15248       fragP->tc_frag_data.prefix_length = padding_size;
15249
15250       /* Update the length for the current interation.  */
15251       left_size = padding_size;
15252       for (next_fragP = fragP;
15253            next_fragP != padding_fragP;
15254            next_fragP = next_fragP->fr_next)
15255         if (next_fragP->fr_type == rs_machine_dependent
15256             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
15257                 == BRANCH_PREFIX))
15258           {
15259             if (left_size)
15260               {
15261                 int max = next_fragP->tc_frag_data.max_bytes;
15262                 if (max)
15263                   {
15264                     int size;
15265                     if (max > left_size)
15266                       size = left_size;
15267                     else
15268                       size = max;
15269                     left_size -= size;
15270                     next_fragP->tc_frag_data.length = size;
15271                   }
15272               }
15273             else
15274               next_fragP->tc_frag_data.length = 0;
15275           }
15276
15277       return (fragP->tc_frag_data.length
15278               - fragP->tc_frag_data.last_length);
15279     }
15280   return relax_frag (segment, fragP, stretch);
15281 }
15282
15283 /* md_estimate_size_before_relax()
15284
15285    Called just before relax() for rs_machine_dependent frags.  The x86
15286    assembler uses these frags to handle variable size jump
15287    instructions.
15288
15289    Any symbol that is now undefined will not become defined.
15290    Return the correct fr_subtype in the frag.
15291    Return the initial "guess for variable size of frag" to caller.
15292    The guess is actually the growth beyond the fixed part.  Whatever
15293    we do to grow the fixed or variable part contributes to our
15294    returned value.  */
15295
15296 int
15297 md_estimate_size_before_relax (fragS *fragP, segT segment)
15298 {
15299   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
15300       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX
15301       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
15302     {
15303       i386_classify_machine_dependent_frag (fragP);
15304       return fragP->tc_frag_data.length;
15305     }
15306
15307   /* We've already got fragP->fr_subtype right;  all we have to do is
15308      check for un-relaxable symbols.  On an ELF system, we can't relax
15309      an externally visible symbol, because it may be overridden by a
15310      shared library.  */
15311   if (S_GET_SEGMENT (fragP->fr_symbol) != segment
15312 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15313       || (IS_ELF
15314           && !elf_symbol_resolved_in_segment_p (fragP->fr_symbol,
15315                                                 fragP->fr_var))
15316 #endif
15317 #if defined (OBJ_COFF) && defined (TE_PE)
15318       || (OUTPUT_FLAVOR == bfd_target_coff_flavour
15319           && S_IS_WEAK (fragP->fr_symbol))
15320 #endif
15321       )
15322     {
15323       /* Symbol is undefined in this segment, or we need to keep a
15324          reloc so that weak symbols can be overridden.  */
15325       int size = (fragP->fr_subtype & CODE16) ? 2 : 4;
15326       enum bfd_reloc_code_real reloc_type;
15327       unsigned char *opcode;
15328       int old_fr_fix;
15329       fixS *fixP = NULL;
15330
15331       if (fragP->fr_var != NO_RELOC)
15332         reloc_type = (enum bfd_reloc_code_real) fragP->fr_var;
15333       else if (size == 2)
15334         reloc_type = BFD_RELOC_16_PCREL;
15335 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15336       else if (fragP->tc_frag_data.code == CODE_64BIT
15337                && fragP->fr_offset == 0
15338                && need_plt32_p (fragP->fr_symbol))
15339         reloc_type = BFD_RELOC_X86_64_PLT32;
15340 #endif
15341       else
15342         reloc_type = BFD_RELOC_32_PCREL;
15343
15344       old_fr_fix = fragP->fr_fix;
15345       opcode = (unsigned char *) fragP->fr_opcode;
15346
15347       switch (TYPE_FROM_RELAX_STATE (fragP->fr_subtype))
15348         {
15349         case UNCOND_JUMP:
15350           /* Make jmp (0xeb) a (d)word displacement jump.  */
15351           opcode[0] = 0xe9;
15352           fragP->fr_fix += size;
15353           fixP = fix_new (fragP, old_fr_fix, size,
15354                           fragP->fr_symbol,
15355                           fragP->fr_offset, 1,
15356                           reloc_type);
15357           break;
15358
15359         case COND_JUMP86:
15360           if (size == 2
15361               && (!no_cond_jump_promotion || fragP->fr_var != NO_RELOC))
15362             {
15363               /* Negate the condition, and branch past an
15364                  unconditional jump.  */
15365               opcode[0] ^= 1;
15366               opcode[1] = 3;
15367               /* Insert an unconditional jump.  */
15368               opcode[2] = 0xe9;
15369               /* We added two extra opcode bytes, and have a two byte
15370                  offset.  */
15371               fragP->fr_fix += 2 + 2;
15372               fix_new (fragP, old_fr_fix + 2, 2,
15373                        fragP->fr_symbol,
15374                        fragP->fr_offset, 1,
15375                        reloc_type);
15376               break;
15377             }
15378           /* Fall through.  */
15379
15380         case COND_JUMP:
15381           if (no_cond_jump_promotion && fragP->fr_var == NO_RELOC)
15382             {
15383               fragP->fr_fix += 1;
15384               fixP = fix_new (fragP, old_fr_fix, 1,
15385                               fragP->fr_symbol,
15386                               fragP->fr_offset, 1,
15387                               BFD_RELOC_8_PCREL);
15388               fixP->fx_signed = 1;
15389               break;
15390             }
15391
15392           /* This changes the byte-displacement jump 0x7N
15393              to the (d)word-displacement jump 0x0f,0x8N.  */
15394           opcode[1] = opcode[0] + 0x10;
15395           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
15396           /* We've added an opcode byte.  */
15397           fragP->fr_fix += 1 + size;
15398           fixP = fix_new (fragP, old_fr_fix + 1, size,
15399                           fragP->fr_symbol,
15400                           fragP->fr_offset, 1,
15401                           reloc_type);
15402           break;
15403
15404         default:
15405           BAD_CASE (fragP->fr_subtype);
15406           break;
15407         }
15408
15409       /* All jumps handled here are signed, but don't unconditionally use a
15410          signed limit check for 32 and 16 bit jumps as we want to allow wrap
15411          around at 4G (outside of 64-bit mode) and 64k.  */
15412       if (size == 4 && flag_code == CODE_64BIT)
15413         fixP->fx_signed = 1;
15414
15415       frag_wane (fragP);
15416       return fragP->fr_fix - old_fr_fix;
15417     }
15418
15419   /* Guess size depending on current relax state.  Initially the relax
15420      state will correspond to a short jump and we return 1, because
15421      the variable part of the frag (the branch offset) is one byte
15422      long.  However, we can relax a section more than once and in that
15423      case we must either set fr_subtype back to the unrelaxed state,
15424      or return the value for the appropriate branch.  */
15425   return md_relax_table[fragP->fr_subtype].rlx_length;
15426 }
15427
15428 /* Called after relax() is finished.
15429
15430    In:  Address of frag.
15431         fr_type == rs_machine_dependent.
15432         fr_subtype is what the address relaxed to.
15433
15434    Out: Any fixSs and constants are set up.
15435         Caller will turn frag into a ".space 0".  */
15436
15437 void
15438 md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED, segT sec ATTRIBUTE_UNUSED,
15439                  fragS *fragP)
15440 {
15441   unsigned char *opcode;
15442   unsigned char *where_to_put_displacement = NULL;
15443   offsetT target_address;
15444   offsetT opcode_address;
15445   unsigned int extension = 0;
15446   offsetT displacement_from_opcode_start;
15447
15448   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
15449       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING
15450       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
15451     {
15452       /* Generate nop padding.  */
15453       unsigned int size = fragP->tc_frag_data.length;
15454       if (size)
15455         {
15456           if (size > fragP->tc_frag_data.max_bytes)
15457             abort ();
15458
15459           if (flag_debug)
15460             {
15461               const char *msg;
15462               const char *branch = "branch";
15463               const char *prefix = "";
15464               fragS *padding_fragP;
15465               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
15466                   == BRANCH_PREFIX)
15467                 {
15468                   padding_fragP = fragP->tc_frag_data.u.padding_fragP;
15469                   switch (fragP->tc_frag_data.default_prefix)
15470                     {
15471                     default:
15472                       abort ();
15473                       break;
15474                     case CS_PREFIX_OPCODE:
15475                       prefix = " cs";
15476                       break;
15477                     case DS_PREFIX_OPCODE:
15478                       prefix = " ds";
15479                       break;
15480                     case ES_PREFIX_OPCODE:
15481                       prefix = " es";
15482                       break;
15483                     case FS_PREFIX_OPCODE:
15484                       prefix = " fs";
15485                       break;
15486                     case GS_PREFIX_OPCODE:
15487                       prefix = " gs";
15488                       break;
15489                     case SS_PREFIX_OPCODE:
15490                       prefix = " ss";
15491                       break;
15492                     }
15493                   if (padding_fragP)
15494                     msg = _("%s:%u: add %d%s at 0x%llx to align "
15495                             "%s within %d-byte boundary\n");
15496                   else
15497                     msg = _("%s:%u: add additional %d%s at 0x%llx to "
15498                             "align %s within %d-byte boundary\n");
15499                 }
15500               else
15501                 {
15502                   padding_fragP = fragP;
15503                   msg = _("%s:%u: add %d%s-byte nop at 0x%llx to align "
15504                           "%s within %d-byte boundary\n");
15505                 }
15506
15507               if (padding_fragP)
15508                 switch (padding_fragP->tc_frag_data.branch_type)
15509                   {
15510                   case align_branch_jcc:
15511                     branch = "jcc";
15512                     break;
15513                   case align_branch_fused:
15514                     branch = "fused jcc";
15515                     break;
15516                   case align_branch_jmp:
15517                     branch = "jmp";
15518                     break;
15519                   case align_branch_call:
15520                     branch = "call";
15521                     break;
15522                   case align_branch_indirect:
15523                     branch = "indiret branch";
15524                     break;
15525                   case align_branch_ret:
15526                     branch = "ret";
15527                     break;
15528                   default:
15529                     break;
15530                   }
15531
15532               fprintf (stdout, msg,
15533                        fragP->fr_file, fragP->fr_line, size, prefix,
15534                        (long long) fragP->fr_address, branch,
15535                        1 << align_branch_power);
15536             }
15537           if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
15538             memset (fragP->fr_opcode,
15539                     fragP->tc_frag_data.default_prefix, size);
15540           else
15541             i386_generate_nops (fragP, (char *) fragP->fr_opcode,
15542                                 size, 0);
15543           fragP->fr_fix += size;
15544         }
15545       return;
15546     }
15547
15548   opcode = (unsigned char *) fragP->fr_opcode;
15549
15550   /* Address we want to reach in file space.  */
15551   target_address = S_GET_VALUE (fragP->fr_symbol) + fragP->fr_offset;
15552
15553   /* Address opcode resides at in file space.  */
15554   opcode_address = fragP->fr_address + fragP->fr_fix;
15555
15556   /* Displacement from opcode start to fill into instruction.  */
15557   displacement_from_opcode_start = target_address - opcode_address;
15558
15559   if ((fragP->fr_subtype & BIG) == 0)
15560     {
15561       /* Don't have to change opcode.  */
15562       extension = 1;            /* 1 opcode + 1 displacement  */
15563       where_to_put_displacement = &opcode[1];
15564     }
15565   else
15566     {
15567       if (no_cond_jump_promotion
15568           && TYPE_FROM_RELAX_STATE (fragP->fr_subtype) != UNCOND_JUMP)
15569         as_warn_where (fragP->fr_file, fragP->fr_line,
15570                        _("long jump required"));
15571
15572       switch (fragP->fr_subtype)
15573         {
15574         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
15575           extension = 4;                /* 1 opcode + 4 displacement  */
15576           opcode[0] = 0xe9;
15577           where_to_put_displacement = &opcode[1];
15578           break;
15579
15580         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
15581           extension = 2;                /* 1 opcode + 2 displacement  */
15582           opcode[0] = 0xe9;
15583           where_to_put_displacement = &opcode[1];
15584           break;
15585
15586         case ENCODE_RELAX_STATE (COND_JUMP, BIG):
15587         case ENCODE_RELAX_STATE (COND_JUMP86, BIG):
15588           extension = 5;                /* 2 opcode + 4 displacement  */
15589           opcode[1] = opcode[0] + 0x10;
15590           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
15591           where_to_put_displacement = &opcode[2];
15592           break;
15593
15594         case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
15595           extension = 3;                /* 2 opcode + 2 displacement  */
15596           opcode[1] = opcode[0] + 0x10;
15597           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
15598           where_to_put_displacement = &opcode[2];
15599           break;
15600
15601         case ENCODE_RELAX_STATE (COND_JUMP86, BIG16):
15602           extension = 4;
15603           opcode[0] ^= 1;
15604           opcode[1] = 3;
15605           opcode[2] = 0xe9;
15606           where_to_put_displacement = &opcode[3];
15607           break;
15608
15609         default:
15610           BAD_CASE (fragP->fr_subtype);
15611           break;
15612         }
15613     }
15614
15615   /* If size if less then four we are sure that the operand fits,
15616      but if it's 4, then it could be that the displacement is larger
15617      then -/+ 2GB.  */
15618   if (DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype) == 4
15619       && object_64bit
15620       && ((addressT) (displacement_from_opcode_start - extension
15621                       + ((addressT) 1 << 31))
15622           > (((addressT) 2 << 31) - 1)))
15623     {
15624       as_bad_where (fragP->fr_file, fragP->fr_line,
15625                     _("jump target out of range"));
15626       /* Make us emit 0.  */
15627       displacement_from_opcode_start = extension;
15628     }
15629   /* Now put displacement after opcode.  */
15630   md_number_to_chars ((char *) where_to_put_displacement,
15631                       (valueT) (displacement_from_opcode_start - extension),
15632                       DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype));
15633   fragP->fr_fix += extension;
15634 }
15635 \f
15636 /* Apply a fixup (fixP) to segment data, once it has been determined
15637    by our caller that we have all the info we need to fix it up.
15638
15639    Parameter valP is the pointer to the value of the bits.
15640
15641    On the 386, immediates, displacements, and data pointers are all in
15642    the same (little-endian) format, so we don't need to care about which
15643    we are handling.  */
15644
15645 void
15646 md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED)
15647 {
15648   char *p = fixP->fx_where + fixP->fx_frag->fr_literal;
15649   valueT value = *valP;
15650
15651 #if !defined (TE_Mach)
15652   if (fixP->fx_pcrel)
15653     {
15654       switch (fixP->fx_r_type)
15655         {
15656         default:
15657           break;
15658
15659         case BFD_RELOC_64:
15660           fixP->fx_r_type = BFD_RELOC_64_PCREL;
15661           break;
15662         case BFD_RELOC_32:
15663         case BFD_RELOC_X86_64_32S:
15664           fixP->fx_r_type = BFD_RELOC_32_PCREL;
15665           break;
15666         case BFD_RELOC_16:
15667           fixP->fx_r_type = BFD_RELOC_16_PCREL;
15668           break;
15669         case BFD_RELOC_8:
15670           fixP->fx_r_type = BFD_RELOC_8_PCREL;
15671           break;
15672         }
15673     }
15674
15675   if (fixP->fx_addsy != NULL
15676       && (fixP->fx_r_type == BFD_RELOC_32_PCREL
15677           || fixP->fx_r_type == BFD_RELOC_64_PCREL
15678           || fixP->fx_r_type == BFD_RELOC_16_PCREL
15679           || fixP->fx_r_type == BFD_RELOC_8_PCREL)
15680       && !use_rela_relocations)
15681     {
15682       /* This is a hack.  There should be a better way to handle this.
15683          This covers for the fact that bfd_install_relocation will
15684          subtract the current location (for partial_inplace, PC relative
15685          relocations); see more below.  */
15686 #ifndef OBJ_AOUT
15687       if (IS_ELF
15688 #ifdef TE_PE
15689           || OUTPUT_FLAVOR == bfd_target_coff_flavour
15690 #endif
15691           )
15692         value += fixP->fx_where + fixP->fx_frag->fr_address;
15693 #endif
15694 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15695       if (IS_ELF)
15696         {
15697           segT sym_seg = S_GET_SEGMENT (fixP->fx_addsy);
15698
15699           if ((sym_seg == seg
15700                || (symbol_section_p (fixP->fx_addsy)
15701                    && sym_seg != absolute_section))
15702               && !generic_force_reloc (fixP))
15703             {
15704               /* Yes, we add the values in twice.  This is because
15705                  bfd_install_relocation subtracts them out again.  I think
15706                  bfd_install_relocation is broken, but I don't dare change
15707                  it.  FIXME.  */
15708               value += fixP->fx_where + fixP->fx_frag->fr_address;
15709             }
15710         }
15711 #endif
15712 #if defined (OBJ_COFF) && defined (TE_PE)
15713       /* For some reason, the PE format does not store a
15714          section address offset for a PC relative symbol.  */
15715       if (S_GET_SEGMENT (fixP->fx_addsy) != seg
15716           || S_IS_WEAK (fixP->fx_addsy))
15717         value += md_pcrel_from (fixP);
15718 #endif
15719     }
15720 #if defined (OBJ_COFF) && defined (TE_PE)
15721   if (fixP->fx_addsy != NULL
15722       && S_IS_WEAK (fixP->fx_addsy)
15723       /* PR 16858: Do not modify weak function references.  */
15724       && ! fixP->fx_pcrel)
15725     {
15726 #if !defined (TE_PEP)
15727       /* For x86 PE weak function symbols are neither PC-relative
15728          nor do they set S_IS_FUNCTION.  So the only reliable way
15729          to detect them is to check the flags of their containing
15730          section.  */
15731       if (S_GET_SEGMENT (fixP->fx_addsy) != NULL
15732           && S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_CODE)
15733         ;
15734       else
15735 #endif
15736       value -= S_GET_VALUE (fixP->fx_addsy);
15737     }
15738 #endif
15739
15740   /* Fix a few things - the dynamic linker expects certain values here,
15741      and we must not disappoint it.  */
15742 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15743   if (IS_ELF && fixP->fx_addsy)
15744     switch (fixP->fx_r_type)
15745       {
15746       case BFD_RELOC_386_PLT32:
15747       case BFD_RELOC_X86_64_PLT32:
15748         /* Make the jump instruction point to the address of the operand.
15749            At runtime we merely add the offset to the actual PLT entry.
15750            NB: Subtract the offset size only for jump instructions.  */
15751         if (fixP->fx_pcrel)
15752           value = -4;
15753         break;
15754
15755       case BFD_RELOC_386_TLS_GD:
15756       case BFD_RELOC_386_TLS_LDM:
15757       case BFD_RELOC_386_TLS_IE_32:
15758       case BFD_RELOC_386_TLS_IE:
15759       case BFD_RELOC_386_TLS_GOTIE:
15760       case BFD_RELOC_386_TLS_GOTDESC:
15761       case BFD_RELOC_X86_64_TLSGD:
15762       case BFD_RELOC_X86_64_TLSLD:
15763       case BFD_RELOC_X86_64_GOTTPOFF:
15764       case BFD_RELOC_X86_64_CODE_4_GOTTPOFF:
15765       case BFD_RELOC_X86_64_CODE_6_GOTTPOFF:
15766       case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
15767       case BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC:
15768         value = 0; /* Fully resolved at runtime.  No addend.  */
15769         /* Fallthrough */
15770       case BFD_RELOC_386_TLS_LE:
15771       case BFD_RELOC_386_TLS_LDO_32:
15772       case BFD_RELOC_386_TLS_LE_32:
15773       case BFD_RELOC_X86_64_DTPOFF32:
15774       case BFD_RELOC_X86_64_DTPOFF64:
15775       case BFD_RELOC_X86_64_TPOFF32:
15776       case BFD_RELOC_X86_64_TPOFF64:
15777         S_SET_THREAD_LOCAL (fixP->fx_addsy);
15778         break;
15779
15780       case BFD_RELOC_386_TLS_DESC_CALL:
15781       case BFD_RELOC_X86_64_TLSDESC_CALL:
15782         value = 0; /* Fully resolved at runtime.  No addend.  */
15783         S_SET_THREAD_LOCAL (fixP->fx_addsy);
15784         fixP->fx_done = 0;
15785         return;
15786
15787       case BFD_RELOC_VTABLE_INHERIT:
15788       case BFD_RELOC_VTABLE_ENTRY:
15789         fixP->fx_done = 0;
15790         return;
15791
15792       default:
15793         break;
15794       }
15795 #endif /* defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)  */
15796
15797   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
15798   if (!object_64bit)
15799     value = extend_to_32bit_address (value);
15800
15801   *valP = value;
15802 #endif /* !defined (TE_Mach)  */
15803
15804   /* Are we finished with this relocation now?  */
15805   if (fixP->fx_addsy == NULL)
15806     {
15807       fixP->fx_done = 1;
15808       switch (fixP->fx_r_type)
15809         {
15810         case BFD_RELOC_X86_64_32S:
15811           fixP->fx_signed = 1;
15812           break;
15813
15814         default:
15815           break;
15816         }
15817     }
15818 #if defined (OBJ_COFF) && defined (TE_PE)
15819   else if (fixP->fx_addsy != NULL && S_IS_WEAK (fixP->fx_addsy))
15820     {
15821       fixP->fx_done = 0;
15822       /* Remember value for tc_gen_reloc.  */
15823       fixP->fx_addnumber = value;
15824       /* Clear out the frag for now.  */
15825       value = 0;
15826     }
15827 #endif
15828   else if (use_rela_relocations)
15829     {
15830       if (!disallow_64bit_reloc || fixP->fx_r_type == NO_RELOC)
15831         fixP->fx_no_overflow = 1;
15832       /* Remember value for tc_gen_reloc.  */
15833       fixP->fx_addnumber = value;
15834       value = 0;
15835     }
15836
15837   md_number_to_chars (p, value, fixP->fx_size);
15838 }
15839 \f
15840 const char *
15841 md_atof (int type, char *litP, int *sizeP)
15842 {
15843   /* This outputs the LITTLENUMs in REVERSE order;
15844      in accord with the bigendian 386.  */
15845   return ieee_md_atof (type, litP, sizeP, false);
15846 }
15847 \f
15848 static char output_invalid_buf[sizeof (unsigned char) * 2 + 6];
15849
15850 static char *
15851 output_invalid (int c)
15852 {
15853   if (ISPRINT (c))
15854     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
15855               "'%c'", c);
15856   else
15857     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
15858               "(0x%x)", (unsigned char) c);
15859   return output_invalid_buf;
15860 }
15861
15862 /* Verify that @r can be used in the current context.  */
15863
15864 static bool check_register (const reg_entry *r)
15865 {
15866   if (allow_pseudo_reg)
15867     return true;
15868
15869   if (operand_type_all_zero (&r->reg_type))
15870     return false;
15871
15872   if ((r->reg_type.bitfield.dword
15873        || (r->reg_type.bitfield.class == SReg && r->reg_num > 3)
15874        || r->reg_type.bitfield.class == RegCR
15875        || r->reg_type.bitfield.class == RegDR)
15876       && !cpu_arch_flags.bitfield.cpui386)
15877     return false;
15878
15879   if (r->reg_type.bitfield.class == RegTR
15880       && (flag_code == CODE_64BIT
15881           || !cpu_arch_flags.bitfield.cpui386
15882           || cpu_arch_isa_flags.bitfield.cpui586
15883           || cpu_arch_isa_flags.bitfield.cpui686))
15884     return false;
15885
15886   if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
15887     return false;
15888
15889   if (!cpu_arch_flags.bitfield.cpuavx512f)
15890     {
15891       if (r->reg_type.bitfield.zmmword
15892           || r->reg_type.bitfield.class == RegMask)
15893         return false;
15894
15895       if (!cpu_arch_flags.bitfield.cpuavx)
15896         {
15897           if (r->reg_type.bitfield.ymmword)
15898             return false;
15899
15900           if (!cpu_arch_flags.bitfield.cpusse && r->reg_type.bitfield.xmmword)
15901             return false;
15902         }
15903     }
15904
15905   if (r->reg_type.bitfield.zmmword)
15906     {
15907       if (vector_size < VSZ512)
15908         return false;
15909
15910       switch (i.encoding)
15911         {
15912         case encoding_default:
15913         case encoding_egpr:
15914           i.encoding = encoding_evex512;
15915           break;
15916         case encoding_evex:
15917         case encoding_evex512:
15918           break;
15919         default:
15920           i.encoding = encoding_error;
15921           break;
15922         }
15923     }
15924
15925   if (vector_size < VSZ256 && r->reg_type.bitfield.ymmword)
15926     return false;
15927
15928   if (r->reg_type.bitfield.tmmword
15929       && (!cpu_arch_flags.bitfield.cpuamx_tile
15930           || flag_code != CODE_64BIT))
15931     return false;
15932
15933   if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx)
15934     return false;
15935
15936   /* Don't allow fake index register unless allow_index_reg isn't 0. */
15937   if (!allow_index_reg && r->reg_num == RegIZ)
15938     return false;
15939
15940   /* Upper 16 vector registers are only available with VREX in 64bit
15941      mode, and require EVEX encoding.  */
15942   if (r->reg_flags & RegVRex)
15943     {
15944       if (!cpu_arch_flags.bitfield.cpuavx512f
15945           || flag_code != CODE_64BIT)
15946         return false;
15947
15948       switch (i.encoding)
15949         {
15950           case encoding_default:
15951           case encoding_egpr:
15952           case encoding_evex512:
15953             i.encoding = encoding_evex;
15954             break;
15955           case encoding_evex:
15956             break;
15957           default:
15958             i.encoding = encoding_error;
15959             break;
15960         }
15961     }
15962
15963   if (r->reg_flags & RegRex2)
15964     {
15965       if (!cpu_arch_flags.bitfield.cpuapx_f
15966           || flag_code != CODE_64BIT)
15967         return false;
15968
15969       switch (i.encoding)
15970         {
15971         case encoding_default:
15972           i.encoding = encoding_egpr;
15973           break;
15974         case encoding_egpr:
15975         case encoding_evex:
15976         case encoding_evex512:
15977           break;
15978         default:
15979           i.encoding = encoding_error;
15980           break;
15981         }
15982     }
15983
15984   if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
15985       && (!cpu_arch_flags.bitfield.cpu64
15986           || r->reg_type.bitfield.class != RegCR
15987           || dot_insn ())
15988       && flag_code != CODE_64BIT)
15989     return false;
15990
15991   if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat
15992       && !intel_syntax)
15993     return false;
15994
15995   return true;
15996 }
15997
15998 /* REG_STRING starts *before* REGISTER_PREFIX.  */
15999
16000 static const reg_entry *
16001 parse_real_register (const char *reg_string, char **end_op)
16002 {
16003   const char *s = reg_string;
16004   char *p;
16005   char reg_name_given[MAX_REG_NAME_SIZE + 1];
16006   const reg_entry *r;
16007
16008   /* Skip possible REGISTER_PREFIX and possible whitespace.  */
16009   if (*s == REGISTER_PREFIX)
16010     ++s;
16011
16012   if (is_space_char (*s))
16013     ++s;
16014
16015   p = reg_name_given;
16016   while ((*p++ = register_chars[(unsigned char) *s]) != '\0')
16017     {
16018       if (p >= reg_name_given + MAX_REG_NAME_SIZE)
16019         return (const reg_entry *) NULL;
16020       s++;
16021     }
16022
16023   if (is_part_of_name (*s))
16024     return (const reg_entry *) NULL;
16025
16026   *end_op = (char *) s;
16027
16028   r = (const reg_entry *) str_hash_find (reg_hash, reg_name_given);
16029
16030   /* Handle floating point regs, allowing spaces in the (i) part.  */
16031   if (r == reg_st0)
16032     {
16033       if (!cpu_arch_flags.bitfield.cpu8087
16034           && !cpu_arch_flags.bitfield.cpu287
16035           && !cpu_arch_flags.bitfield.cpu387
16036           && !allow_pseudo_reg)
16037         return (const reg_entry *) NULL;
16038
16039       if (is_space_char (*s))
16040         ++s;
16041       if (*s == '(')
16042         {
16043           ++s;
16044           if (is_space_char (*s))
16045             ++s;
16046           if (*s >= '0' && *s <= '7')
16047             {
16048               int fpr = *s - '0';
16049               ++s;
16050               if (is_space_char (*s))
16051                 ++s;
16052               if (*s == ')')
16053                 {
16054                   *end_op = (char *) s + 1;
16055                   know (r[fpr].reg_num == fpr);
16056                   return r + fpr;
16057                 }
16058             }
16059           /* We have "%st(" then garbage.  */
16060           return (const reg_entry *) NULL;
16061         }
16062     }
16063
16064   return r && check_register (r) ? r : NULL;
16065 }
16066
16067 /* REG_STRING starts *before* REGISTER_PREFIX.  */
16068
16069 static const reg_entry *
16070 parse_register (const char *reg_string, char **end_op)
16071 {
16072   const reg_entry *r;
16073
16074   if (*reg_string == REGISTER_PREFIX || allow_naked_reg)
16075     r = parse_real_register (reg_string, end_op);
16076   else
16077     r = NULL;
16078   if (!r)
16079     {
16080       char *save = input_line_pointer;
16081       char *buf = xstrdup (reg_string), *name;
16082       symbolS *symbolP;
16083
16084       input_line_pointer = buf;
16085       get_symbol_name (&name);
16086       symbolP = symbol_find (name);
16087       while (symbolP && symbol_equated_p (symbolP))
16088         {
16089           const expressionS *e = symbol_get_value_expression(symbolP);
16090
16091           if (e->X_add_number)
16092             break;
16093           symbolP = e->X_add_symbol;
16094         }
16095       if (symbolP && S_GET_SEGMENT (symbolP) == reg_section)
16096         {
16097           const expressionS *e = symbol_get_value_expression (symbolP);
16098
16099           if (e->X_op == O_register)
16100             {
16101               know (e->X_add_number >= 0
16102                     && (valueT) e->X_add_number < i386_regtab_size);
16103               r = i386_regtab + e->X_add_number;
16104               *end_op = (char *) reg_string + (input_line_pointer - buf);
16105             }
16106           if (r && !check_register (r))
16107             {
16108               as_bad (_("register '%s%s' cannot be used here"),
16109                       register_prefix, r->reg_name);
16110               r = &bad_reg;
16111             }
16112         }
16113       input_line_pointer = save;
16114       free (buf);
16115     }
16116   return r;
16117 }
16118
16119 int
16120 i386_parse_name (char *name, expressionS *e, char *nextcharP)
16121 {
16122   const reg_entry *r = NULL;
16123   char *end = input_line_pointer;
16124
16125   /* We only know the terminating character here.  It being double quote could
16126      be the closing one of a quoted symbol name, or an opening one from a
16127      following string (or another quoted symbol name).  Since the latter can't
16128      be valid syntax for anything, bailing in either case is good enough.  */
16129   if (*nextcharP == '"')
16130     return 0;
16131
16132   *end = *nextcharP;
16133   if (*name == REGISTER_PREFIX || allow_naked_reg)
16134     r = parse_real_register (name, &input_line_pointer);
16135   if (r && end <= input_line_pointer)
16136     {
16137       *nextcharP = *input_line_pointer;
16138       *input_line_pointer = 0;
16139       e->X_op = O_register;
16140       e->X_add_number = r - i386_regtab;
16141       return 1;
16142     }
16143   input_line_pointer = end;
16144   *end = 0;
16145   return intel_syntax ? i386_intel_parse_name (name, e) : 0;
16146 }
16147
16148 void
16149 md_operand (expressionS *e)
16150 {
16151   char *end;
16152   const reg_entry *r;
16153
16154   switch (*input_line_pointer)
16155     {
16156     case REGISTER_PREFIX:
16157       r = parse_real_register (input_line_pointer, &end);
16158       if (r)
16159         {
16160           e->X_op = O_register;
16161           e->X_add_number = r - i386_regtab;
16162           input_line_pointer = end;
16163         }
16164       break;
16165
16166     case '[':
16167       gas_assert (intel_syntax);
16168       end = input_line_pointer++;
16169       expression (e);
16170       if (*input_line_pointer == ']')
16171         {
16172           ++input_line_pointer;
16173           e->X_op_symbol = make_expr_symbol (e);
16174           e->X_add_symbol = NULL;
16175           e->X_add_number = 0;
16176           e->X_op = O_index;
16177         }
16178       else
16179         {
16180           e->X_op = O_absent;
16181           input_line_pointer = end;
16182         }
16183       break;
16184     }
16185 }
16186
16187 #ifdef BFD64
16188 /* To maintain consistency with !BFD64 builds of gas record, whether any
16189    (binary) operator was involved in an expression.  As expressions are
16190    evaluated in only 32 bits when !BFD64, we use this to decide whether to
16191    truncate results.  */
16192 bool i386_record_operator (operatorT op,
16193                            const expressionS *left,
16194                            const expressionS *right)
16195 {
16196   if (op == O_absent)
16197     return false;
16198
16199   if (!left)
16200     {
16201       /* Since the expression parser applies unary operators fine to bignum
16202          operands, we don't need to be concerned of respective operands not
16203          fitting in 32 bits.  */
16204       if (right->X_op == O_constant && right->X_unsigned
16205           && !fits_in_unsigned_long (right->X_add_number))
16206         return false;
16207     }
16208   /* This isn't entirely right: The pattern can also result when constant
16209      expressions are folded (e.g. 0xffffffff + 1).  */
16210   else if ((left->X_op == O_constant && left->X_unsigned
16211             && !fits_in_unsigned_long (left->X_add_number))
16212            || (right->X_op == O_constant && right->X_unsigned
16213                && !fits_in_unsigned_long (right->X_add_number)))
16214     expr_mode = expr_large_value;
16215
16216   if (expr_mode != expr_large_value)
16217     expr_mode = expr_operator_present;
16218
16219   return false;
16220 }
16221 #endif
16222 \f
16223 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
16224 const char *md_shortopts = "kVQ:sqnO::";
16225 #else
16226 const char *md_shortopts = "qnO::";
16227 #endif
16228
16229 #define OPTION_32 (OPTION_MD_BASE + 0)
16230 #define OPTION_64 (OPTION_MD_BASE + 1)
16231 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
16232 #define OPTION_MARCH (OPTION_MD_BASE + 3)
16233 #define OPTION_MTUNE (OPTION_MD_BASE + 4)
16234 #define OPTION_MMNEMONIC (OPTION_MD_BASE + 5)
16235 #define OPTION_MSYNTAX (OPTION_MD_BASE + 6)
16236 #define OPTION_MINDEX_REG (OPTION_MD_BASE + 7)
16237 #define OPTION_MNAKED_REG (OPTION_MD_BASE + 8)
16238 #define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 9)
16239 #define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
16240 #define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
16241 #define OPTION_MOPERAND_CHECK (OPTION_MD_BASE + 12)
16242 #define OPTION_MAVXSCALAR (OPTION_MD_BASE + 13)
16243 #define OPTION_X32 (OPTION_MD_BASE + 14)
16244 #define OPTION_MADD_BND_PREFIX (OPTION_MD_BASE + 15)
16245 #define OPTION_MEVEXLIG (OPTION_MD_BASE + 16)
16246 #define OPTION_MEVEXWIG (OPTION_MD_BASE + 17)
16247 #define OPTION_MBIG_OBJ (OPTION_MD_BASE + 18)
16248 #define OPTION_MOMIT_LOCK_PREFIX (OPTION_MD_BASE + 19)
16249 #define OPTION_MEVEXRCIG (OPTION_MD_BASE + 20)
16250 #define OPTION_MSHARED (OPTION_MD_BASE + 21)
16251 #define OPTION_MAMD64 (OPTION_MD_BASE + 22)
16252 #define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
16253 #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
16254 #define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25)
16255 #define OPTION_MVEXWIG (OPTION_MD_BASE + 26)
16256 #define OPTION_MALIGN_BRANCH_BOUNDARY (OPTION_MD_BASE + 27)
16257 #define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28)
16258 #define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29)
16259 #define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30)
16260 #define OPTION_MLFENCE_AFTER_LOAD (OPTION_MD_BASE + 31)
16261 #define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32)
16262 #define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33)
16263 #define OPTION_MUSE_UNALIGNED_VECTOR_MOVE (OPTION_MD_BASE + 34)
16264
16265 struct option md_longopts[] =
16266 {
16267   {"32", no_argument, NULL, OPTION_32},
16268 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
16269      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
16270   {"64", no_argument, NULL, OPTION_64},
16271 #endif
16272 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
16273   {"x32", no_argument, NULL, OPTION_X32},
16274   {"mshared", no_argument, NULL, OPTION_MSHARED},
16275   {"mx86-used-note", required_argument, NULL, OPTION_X86_USED_NOTE},
16276 #endif
16277   {"divide", no_argument, NULL, OPTION_DIVIDE},
16278   {"march", required_argument, NULL, OPTION_MARCH},
16279   {"mtune", required_argument, NULL, OPTION_MTUNE},
16280   {"mmnemonic", required_argument, NULL, OPTION_MMNEMONIC},
16281   {"msyntax", required_argument, NULL, OPTION_MSYNTAX},
16282   {"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
16283   {"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
16284   {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
16285   {"muse-unaligned-vector-move", no_argument, NULL, OPTION_MUSE_UNALIGNED_VECTOR_MOVE},
16286   {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
16287   {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
16288   {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
16289   {"mvexwig", required_argument, NULL, OPTION_MVEXWIG},
16290   {"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX},
16291   {"mevexlig", required_argument, NULL, OPTION_MEVEXLIG},
16292   {"mevexwig", required_argument, NULL, OPTION_MEVEXWIG},
16293 # if defined (TE_PE) || defined (TE_PEP)
16294   {"mbig-obj", no_argument, NULL, OPTION_MBIG_OBJ},
16295 #endif
16296   {"momit-lock-prefix", required_argument, NULL, OPTION_MOMIT_LOCK_PREFIX},
16297   {"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD},
16298   {"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS},
16299   {"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG},
16300   {"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY},
16301   {"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
16302   {"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
16303   {"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES},
16304   {"mlfence-after-load", required_argument, NULL, OPTION_MLFENCE_AFTER_LOAD},
16305   {"mlfence-before-indirect-branch", required_argument, NULL,
16306    OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH},
16307   {"mlfence-before-ret", required_argument, NULL, OPTION_MLFENCE_BEFORE_RET},
16308   {"mamd64", no_argument, NULL, OPTION_MAMD64},
16309   {"mintel64", no_argument, NULL, OPTION_MINTEL64},
16310   {NULL, no_argument, NULL, 0}
16311 };
16312 size_t md_longopts_size = sizeof (md_longopts);
16313
16314 int
16315 md_parse_option (int c, const char *arg)
16316 {
16317   unsigned int j;
16318   char *arch, *next, *saved, *type;
16319
16320   switch (c)
16321     {
16322     case 'n':
16323       optimize_align_code = 0;
16324       break;
16325
16326     case 'q':
16327       quiet_warnings = 1;
16328       break;
16329
16330 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
16331       /* -Qy, -Qn: SVR4 arguments controlling whether a .comment section
16332          should be emitted or not.  FIXME: Not implemented.  */
16333     case 'Q':
16334       if ((arg[0] != 'y' && arg[0] != 'n') || arg[1])
16335         return 0;
16336       break;
16337
16338       /* -V: SVR4 argument to print version ID.  */
16339     case 'V':
16340       print_version_id ();
16341       break;
16342
16343       /* -k: Ignore for FreeBSD compatibility.  */
16344     case 'k':
16345       break;
16346
16347     case 's':
16348       /* -s: On i386 Solaris, this tells the native assembler to use
16349          .stab instead of .stab.excl.  We always use .stab anyhow.  */
16350       break;
16351
16352     case OPTION_MSHARED:
16353       shared = 1;
16354       break;
16355
16356     case OPTION_X86_USED_NOTE:
16357       if (strcasecmp (arg, "yes") == 0)
16358         x86_used_note = 1;
16359       else if (strcasecmp (arg, "no") == 0)
16360         x86_used_note = 0;
16361       else
16362         as_fatal (_("invalid -mx86-used-note= option: `%s'"), arg);
16363       break;
16364
16365
16366 #endif
16367 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
16368      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
16369     case OPTION_64:
16370       {
16371         const char **list, **l;
16372
16373         list = bfd_target_list ();
16374         for (l = list; *l != NULL; l++)
16375           if (startswith (*l, "elf64-x86-64")
16376               || strcmp (*l, "coff-x86-64") == 0
16377               || strcmp (*l, "pe-x86-64") == 0
16378               || strcmp (*l, "pei-x86-64") == 0
16379               || strcmp (*l, "mach-o-x86-64") == 0)
16380             {
16381               default_arch = "x86_64";
16382               break;
16383             }
16384         if (*l == NULL)
16385           as_fatal (_("no compiled in support for x86_64"));
16386         free (list);
16387       }
16388       break;
16389 #endif
16390
16391 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
16392     case OPTION_X32:
16393       if (IS_ELF)
16394         {
16395           const char **list, **l;
16396
16397           list = bfd_target_list ();
16398           for (l = list; *l != NULL; l++)
16399             if (startswith (*l, "elf32-x86-64"))
16400               {
16401                 default_arch = "x86_64:32";
16402                 break;
16403               }
16404           if (*l == NULL)
16405             as_fatal (_("no compiled in support for 32bit x86_64"));
16406           free (list);
16407         }
16408       else
16409         as_fatal (_("32bit x86_64 is only supported for ELF"));
16410       break;
16411 #endif
16412
16413     case OPTION_32:
16414       {
16415         const char **list, **l;
16416
16417         list = bfd_target_list ();
16418         for (l = list; *l != NULL; l++)
16419           if (strstr (*l, "-i386")
16420               || strstr (*l, "-go32"))
16421             {
16422               default_arch = "i386";
16423               break;
16424             }
16425         if (*l == NULL)
16426           as_fatal (_("no compiled in support for ix86"));
16427         free (list);
16428       }
16429       break;
16430
16431     case OPTION_DIVIDE:
16432 #ifdef SVR4_COMMENT_CHARS
16433       {
16434         char *n, *t;
16435         const char *s;
16436
16437         n = XNEWVEC (char, strlen (i386_comment_chars) + 1);
16438         t = n;
16439         for (s = i386_comment_chars; *s != '\0'; s++)
16440           if (*s != '/')
16441             *t++ = *s;
16442         *t = '\0';
16443         i386_comment_chars = n;
16444       }
16445 #endif
16446       break;
16447
16448     case OPTION_MARCH:
16449       saved = xstrdup (arg);
16450       arch = saved;
16451       /* Allow -march=+nosse.  */
16452       if (*arch == '+')
16453         arch++;
16454       do
16455         {
16456           char *vsz;
16457
16458           if (*arch == '.')
16459             as_fatal (_("invalid -march= option: `%s'"), arg);
16460           next = strchr (arch, '+');
16461           if (next)
16462             *next++ = '\0';
16463           vsz = strchr (arch, '/');
16464           if (vsz)
16465             *vsz++ = '\0';
16466           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
16467             {
16468               if (vsz && cpu_arch[j].vsz != vsz_set)
16469                 continue;
16470
16471               if (arch == saved && cpu_arch[j].type != PROCESSOR_NONE
16472                   && strcmp (arch, cpu_arch[j].name) == 0)
16473                 {
16474                   /* Processor.  */
16475                   if (! cpu_arch[j].enable.bitfield.cpui386)
16476                     continue;
16477
16478                   cpu_arch_name = cpu_arch[j].name;
16479                   free (cpu_sub_arch_name);
16480                   cpu_sub_arch_name = NULL;
16481                   cpu_arch_flags = cpu_arch[j].enable;
16482                   cpu_arch_isa = cpu_arch[j].type;
16483                   cpu_arch_isa_flags = cpu_arch[j].enable;
16484                   if (!cpu_arch_tune_set)
16485                     cpu_arch_tune = cpu_arch_isa;
16486                   vector_size = VSZ_DEFAULT;
16487                   break;
16488                 }
16489               else if (cpu_arch[j].type == PROCESSOR_NONE
16490                        && strcmp (arch, cpu_arch[j].name) == 0
16491                        && !cpu_flags_all_zero (&cpu_arch[j].enable))
16492                 {
16493                   /* ISA extension.  */
16494                   isa_enable (j);
16495
16496                   switch (cpu_arch[j].vsz)
16497                     {
16498                     default:
16499                       break;
16500
16501                     case vsz_set:
16502                       if (vsz)
16503                         {
16504                           char *end;
16505                           unsigned long val = strtoul (vsz, &end, 0);
16506
16507                           if (*end)
16508                             val = 0;
16509                           switch (val)
16510                             {
16511                             case 512: vector_size = VSZ512; break;
16512                             case 256: vector_size = VSZ256; break;
16513                             case 128: vector_size = VSZ128; break;
16514                             default:
16515                               as_warn (_("Unrecognized vector size specifier ignored"));
16516                               break;
16517                             }
16518                           break;
16519                         }
16520                         /* Fall through.  */
16521                     case vsz_reset:
16522                       vector_size = VSZ_DEFAULT;
16523                       break;
16524                     }
16525
16526                   break;
16527                 }
16528             }
16529
16530           if (j >= ARRAY_SIZE (cpu_arch) && startswith (arch, "no"))
16531             {
16532               /* Disable an ISA extension.  */
16533               for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
16534                 if (cpu_arch[j].type == PROCESSOR_NONE
16535                     && strcmp (arch + 2, cpu_arch[j].name) == 0)
16536                   {
16537                     isa_disable (j);
16538                     if (cpu_arch[j].vsz == vsz_set)
16539                       vector_size = VSZ_DEFAULT;
16540                     break;
16541                   }
16542             }
16543
16544           if (j >= ARRAY_SIZE (cpu_arch))
16545             as_fatal (_("invalid -march= option: `%s'"), arg);
16546
16547           arch = next;
16548         }
16549       while (next != NULL);
16550       free (saved);
16551       break;
16552
16553     case OPTION_MTUNE:
16554       if (*arg == '.')
16555         as_fatal (_("invalid -mtune= option: `%s'"), arg);
16556       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
16557         {
16558           if (cpu_arch[j].type != PROCESSOR_NONE
16559               && strcmp (arg, cpu_arch[j].name) == 0)
16560             {
16561               cpu_arch_tune_set = 1;
16562               cpu_arch_tune = cpu_arch [j].type;
16563               break;
16564             }
16565         }
16566       if (j >= ARRAY_SIZE (cpu_arch))
16567         as_fatal (_("invalid -mtune= option: `%s'"), arg);
16568       break;
16569
16570     case OPTION_MMNEMONIC:
16571       if (strcasecmp (arg, "att") == 0)
16572         intel_mnemonic = 0;
16573       else if (strcasecmp (arg, "intel") == 0)
16574         intel_mnemonic = 1;
16575       else
16576         as_fatal (_("invalid -mmnemonic= option: `%s'"), arg);
16577       break;
16578
16579     case OPTION_MSYNTAX:
16580       if (strcasecmp (arg, "att") == 0)
16581         _set_intel_syntax (0);
16582       else if (strcasecmp (arg, "intel") == 0)
16583         _set_intel_syntax (1);
16584       else
16585         as_fatal (_("invalid -msyntax= option: `%s'"), arg);
16586       break;
16587
16588     case OPTION_MINDEX_REG:
16589       allow_index_reg = 1;
16590       break;
16591
16592     case OPTION_MNAKED_REG:
16593       allow_naked_reg = 1;
16594       register_prefix = "";
16595       break;
16596
16597     case OPTION_MSSE2AVX:
16598       sse2avx = 1;
16599       break;
16600
16601     case OPTION_MUSE_UNALIGNED_VECTOR_MOVE:
16602       use_unaligned_vector_move = 1;
16603       break;
16604
16605     case OPTION_MSSE_CHECK:
16606       if (strcasecmp (arg, "error") == 0)
16607         sse_check = check_error;
16608       else if (strcasecmp (arg, "warning") == 0)
16609         sse_check = check_warning;
16610       else if (strcasecmp (arg, "none") == 0)
16611         sse_check = check_none;
16612       else
16613         as_fatal (_("invalid -msse-check= option: `%s'"), arg);
16614       break;
16615
16616     case OPTION_MOPERAND_CHECK:
16617       if (strcasecmp (arg, "error") == 0)
16618         operand_check = check_error;
16619       else if (strcasecmp (arg, "warning") == 0)
16620         operand_check = check_warning;
16621       else if (strcasecmp (arg, "none") == 0)
16622         operand_check = check_none;
16623       else
16624         as_fatal (_("invalid -moperand-check= option: `%s'"), arg);
16625       break;
16626
16627     case OPTION_MAVXSCALAR:
16628       if (strcasecmp (arg, "128") == 0)
16629         avxscalar = vex128;
16630       else if (strcasecmp (arg, "256") == 0)
16631         avxscalar = vex256;
16632       else
16633         as_fatal (_("invalid -mavxscalar= option: `%s'"), arg);
16634       break;
16635
16636     case OPTION_MVEXWIG:
16637       if (strcmp (arg, "0") == 0)
16638         vexwig = vexw0;
16639       else if (strcmp (arg, "1") == 0)
16640         vexwig = vexw1;
16641       else
16642         as_fatal (_("invalid -mvexwig= option: `%s'"), arg);
16643       break;
16644
16645     case OPTION_MADD_BND_PREFIX:
16646       add_bnd_prefix = 1;
16647       break;
16648
16649     case OPTION_MEVEXLIG:
16650       if (strcmp (arg, "128") == 0)
16651         evexlig = evexl128;
16652       else if (strcmp (arg, "256") == 0)
16653         evexlig = evexl256;
16654       else  if (strcmp (arg, "512") == 0)
16655         evexlig = evexl512;
16656       else
16657         as_fatal (_("invalid -mevexlig= option: `%s'"), arg);
16658       break;
16659
16660     case OPTION_MEVEXRCIG:
16661       if (strcmp (arg, "rne") == 0)
16662         evexrcig = rne;
16663       else if (strcmp (arg, "rd") == 0)
16664         evexrcig = rd;
16665       else if (strcmp (arg, "ru") == 0)
16666         evexrcig = ru;
16667       else if (strcmp (arg, "rz") == 0)
16668         evexrcig = rz;
16669       else
16670         as_fatal (_("invalid -mevexrcig= option: `%s'"), arg);
16671       break;
16672
16673     case OPTION_MEVEXWIG:
16674       if (strcmp (arg, "0") == 0)
16675         evexwig = evexw0;
16676       else if (strcmp (arg, "1") == 0)
16677         evexwig = evexw1;
16678       else
16679         as_fatal (_("invalid -mevexwig= option: `%s'"), arg);
16680       break;
16681
16682 # if defined (TE_PE) || defined (TE_PEP)
16683     case OPTION_MBIG_OBJ:
16684       use_big_obj = 1;
16685       break;
16686 #endif
16687
16688     case OPTION_MOMIT_LOCK_PREFIX:
16689       if (strcasecmp (arg, "yes") == 0)
16690         omit_lock_prefix = 1;
16691       else if (strcasecmp (arg, "no") == 0)
16692         omit_lock_prefix = 0;
16693       else
16694         as_fatal (_("invalid -momit-lock-prefix= option: `%s'"), arg);
16695       break;
16696
16697     case OPTION_MFENCE_AS_LOCK_ADD:
16698       if (strcasecmp (arg, "yes") == 0)
16699         avoid_fence = 1;
16700       else if (strcasecmp (arg, "no") == 0)
16701         avoid_fence = 0;
16702       else
16703         as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
16704       break;
16705
16706     case OPTION_MLFENCE_AFTER_LOAD:
16707       if (strcasecmp (arg, "yes") == 0)
16708         lfence_after_load = 1;
16709       else if (strcasecmp (arg, "no") == 0)
16710         lfence_after_load = 0;
16711       else
16712         as_fatal (_("invalid -mlfence-after-load= option: `%s'"), arg);
16713       break;
16714
16715     case OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH:
16716       if (strcasecmp (arg, "all") == 0)
16717         {
16718           lfence_before_indirect_branch = lfence_branch_all;
16719           if (lfence_before_ret == lfence_before_ret_none)
16720             lfence_before_ret = lfence_before_ret_shl;
16721         }
16722       else if (strcasecmp (arg, "memory") == 0)
16723         lfence_before_indirect_branch = lfence_branch_memory;
16724       else if (strcasecmp (arg, "register") == 0)
16725         lfence_before_indirect_branch = lfence_branch_register;
16726       else if (strcasecmp (arg, "none") == 0)
16727         lfence_before_indirect_branch = lfence_branch_none;
16728       else
16729         as_fatal (_("invalid -mlfence-before-indirect-branch= option: `%s'"),
16730                   arg);
16731       break;
16732
16733     case OPTION_MLFENCE_BEFORE_RET:
16734       if (strcasecmp (arg, "or") == 0)
16735         lfence_before_ret = lfence_before_ret_or;
16736       else if (strcasecmp (arg, "not") == 0)
16737         lfence_before_ret = lfence_before_ret_not;
16738       else if (strcasecmp (arg, "shl") == 0 || strcasecmp (arg, "yes") == 0)
16739         lfence_before_ret = lfence_before_ret_shl;
16740       else if (strcasecmp (arg, "none") == 0)
16741         lfence_before_ret = lfence_before_ret_none;
16742       else
16743         as_fatal (_("invalid -mlfence-before-ret= option: `%s'"),
16744                   arg);
16745       break;
16746
16747     case OPTION_MRELAX_RELOCATIONS:
16748       if (strcasecmp (arg, "yes") == 0)
16749         generate_relax_relocations = 1;
16750       else if (strcasecmp (arg, "no") == 0)
16751         generate_relax_relocations = 0;
16752       else
16753         as_fatal (_("invalid -mrelax-relocations= option: `%s'"), arg);
16754       break;
16755
16756     case OPTION_MALIGN_BRANCH_BOUNDARY:
16757       {
16758         char *end;
16759         long int align = strtoul (arg, &end, 0);
16760         if (*end == '\0')
16761           {
16762             if (align == 0)
16763               {
16764                 align_branch_power = 0;
16765                 break;
16766               }
16767             else if (align >= 16)
16768               {
16769                 int align_power;
16770                 for (align_power = 0;
16771                      (align & 1) == 0;
16772                      align >>= 1, align_power++)
16773                   continue;
16774                 /* Limit alignment power to 31.  */
16775                 if (align == 1 && align_power < 32)
16776                   {
16777                     align_branch_power = align_power;
16778                     break;
16779                   }
16780               }
16781           }
16782         as_fatal (_("invalid -malign-branch-boundary= value: %s"), arg);
16783       }
16784       break;
16785
16786     case OPTION_MALIGN_BRANCH_PREFIX_SIZE:
16787       {
16788         char *end;
16789         int align = strtoul (arg, &end, 0);
16790         /* Some processors only support 5 prefixes.  */
16791         if (*end == '\0' && align >= 0 && align < 6)
16792           {
16793             align_branch_prefix_size = align;
16794             break;
16795           }
16796         as_fatal (_("invalid -malign-branch-prefix-size= value: %s"),
16797                   arg);
16798       }
16799       break;
16800
16801     case OPTION_MALIGN_BRANCH:
16802       align_branch = 0;
16803       saved = xstrdup (arg);
16804       type = saved;
16805       do
16806         {
16807           next = strchr (type, '+');
16808           if (next)
16809             *next++ = '\0';
16810           if (strcasecmp (type, "jcc") == 0)
16811             align_branch |= align_branch_jcc_bit;
16812           else if (strcasecmp (type, "fused") == 0)
16813             align_branch |= align_branch_fused_bit;
16814           else if (strcasecmp (type, "jmp") == 0)
16815             align_branch |= align_branch_jmp_bit;
16816           else if (strcasecmp (type, "call") == 0)
16817             align_branch |= align_branch_call_bit;
16818           else if (strcasecmp (type, "ret") == 0)
16819             align_branch |= align_branch_ret_bit;
16820           else if (strcasecmp (type, "indirect") == 0)
16821             align_branch |= align_branch_indirect_bit;
16822           else
16823             as_fatal (_("invalid -malign-branch= option: `%s'"), arg);
16824           type = next;
16825         }
16826       while (next != NULL);
16827       free (saved);
16828       break;
16829
16830     case OPTION_MBRANCHES_WITH_32B_BOUNDARIES:
16831       align_branch_power = 5;
16832       align_branch_prefix_size = 5;
16833       align_branch = (align_branch_jcc_bit
16834                       | align_branch_fused_bit
16835                       | align_branch_jmp_bit);
16836       break;
16837
16838     case OPTION_MAMD64:
16839       isa64 = amd64;
16840       break;
16841
16842     case OPTION_MINTEL64:
16843       isa64 = intel64;
16844       break;
16845
16846     case 'O':
16847       if (arg == NULL)
16848         {
16849           optimize = 1;
16850           /* Turn off -Os.  */
16851           optimize_for_space = 0;
16852         }
16853       else if (*arg == 's')
16854         {
16855           optimize_for_space = 1;
16856           /* Turn on all encoding optimizations.  */
16857           optimize = INT_MAX;
16858         }
16859       else
16860         {
16861           optimize = atoi (arg);
16862           /* Turn off -Os.  */
16863           optimize_for_space = 0;
16864         }
16865       break;
16866
16867     default:
16868       return 0;
16869     }
16870   return 1;
16871 }
16872
16873 #define MESSAGE_TEMPLATE \
16874 "                                                                                "
16875
16876 static char *
16877 output_message (FILE *stream, char *p, char *message, char *start,
16878                 int *left_p, const char *name, int len)
16879 {
16880   int size = sizeof (MESSAGE_TEMPLATE);
16881   int left = *left_p;
16882
16883   /* Reserve 2 spaces for ", " or ",\0" */
16884   left -= len + 2;
16885
16886   /* Check if there is any room.  */
16887   if (left >= 0)
16888     {
16889       if (p != start)
16890         {
16891           *p++ = ',';
16892           *p++ = ' ';
16893         }
16894       p = mempcpy (p, name, len);
16895     }
16896   else
16897     {
16898       /* Output the current message now and start a new one.  */
16899       *p++ = ',';
16900       *p = '\0';
16901       fprintf (stream, "%s\n", message);
16902       p = start;
16903       left = size - (start - message) - len - 2;
16904
16905       gas_assert (left >= 0);
16906
16907       p = mempcpy (p, name, len);
16908     }
16909
16910   *left_p = left;
16911   return p;
16912 }
16913
16914 static void
16915 show_arch (FILE *stream, int ext, int check)
16916 {
16917   static char message[] = MESSAGE_TEMPLATE;
16918   char *start = message + 27;
16919   char *p;
16920   int size = sizeof (MESSAGE_TEMPLATE);
16921   int left;
16922   const char *name;
16923   int len;
16924   unsigned int j;
16925
16926   p = start;
16927   left = size - (start - message);
16928
16929   if (!ext && check)
16930     {
16931       p = output_message (stream, p, message, start, &left,
16932                           STRING_COMMA_LEN ("default"));
16933       p = output_message (stream, p, message, start, &left,
16934                           STRING_COMMA_LEN ("push"));
16935       p = output_message (stream, p, message, start, &left,
16936                           STRING_COMMA_LEN ("pop"));
16937     }
16938
16939   for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
16940     {
16941       /* Should it be skipped?  */
16942       if (cpu_arch [j].skip)
16943         continue;
16944
16945       name = cpu_arch [j].name;
16946       len = cpu_arch [j].len;
16947       if (cpu_arch[j].type == PROCESSOR_NONE)
16948         {
16949           /* It is an extension.  Skip if we aren't asked to show it.  */
16950           if (!ext || cpu_flags_all_zero (&cpu_arch[j].enable))
16951             continue;
16952         }
16953       else if (ext)
16954         {
16955           /* It is an processor.  Skip if we show only extension.  */
16956           continue;
16957         }
16958       else if (check && ! cpu_arch[j].enable.bitfield.cpui386)
16959         {
16960           /* It is an impossible processor - skip.  */
16961           continue;
16962         }
16963
16964       p = output_message (stream, p, message, start, &left, name, len);
16965     }
16966
16967   /* Display disabled extensions.  */
16968   if (ext)
16969     for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
16970       {
16971         char *str;
16972
16973         if (cpu_arch[j].type != PROCESSOR_NONE
16974             || !cpu_flags_all_zero (&cpu_arch[j].enable))
16975           continue;
16976         str = xasprintf ("no%s", cpu_arch[j].name);
16977         p = output_message (stream, p, message, start, &left, str,
16978                             strlen (str));
16979         free (str);
16980       }
16981
16982   *p = '\0';
16983   fprintf (stream, "%s\n", message);
16984 }
16985
16986 void
16987 md_show_usage (FILE *stream)
16988 {
16989 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
16990   fprintf (stream, _("\
16991   -Qy, -Qn                ignored\n\
16992   -V                      print assembler version number\n\
16993   -k                      ignored\n"));
16994 #endif
16995   fprintf (stream, _("\
16996   -n                      do not optimize code alignment\n\
16997   -O{012s}                attempt some code optimizations\n\
16998   -q                      quieten some warnings\n"));
16999 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
17000   fprintf (stream, _("\
17001   -s                      ignored\n"));
17002 #endif
17003 #ifdef BFD64
17004 # if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
17005   fprintf (stream, _("\
17006   --32/--64/--x32         generate 32bit/64bit/x32 object\n"));
17007 # elif defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O)
17008   fprintf (stream, _("\
17009   --32/--64               generate 32bit/64bit object\n"));
17010 # endif
17011 #endif
17012 #ifdef SVR4_COMMENT_CHARS
17013   fprintf (stream, _("\
17014   --divide                do not treat `/' as a comment character\n"));
17015 #else
17016   fprintf (stream, _("\
17017   --divide                ignored\n"));
17018 #endif
17019   fprintf (stream, _("\
17020   -march=CPU[,+EXTENSION...]\n\
17021                           generate code for CPU and EXTENSION, CPU is one of:\n"));
17022   show_arch (stream, 0, 1);
17023   fprintf (stream, _("\
17024                           EXTENSION is combination of (possibly \"no\"-prefixed):\n"));
17025   show_arch (stream, 1, 0);
17026   fprintf (stream, _("\
17027   -mtune=CPU              optimize for CPU, CPU is one of:\n"));
17028   show_arch (stream, 0, 0);
17029   fprintf (stream, _("\
17030   -msse2avx               encode SSE instructions with VEX prefix\n"));
17031   fprintf (stream, _("\
17032   -muse-unaligned-vector-move\n\
17033                           encode aligned vector move as unaligned vector move\n"));
17034   fprintf (stream, _("\
17035   -msse-check=[none|error|warning] (default: none)\n\
17036                           check SSE instructions\n"));
17037   fprintf (stream, _("\
17038   -moperand-check=[none|error|warning] (default: warning)\n\
17039                           check operand combinations for validity\n"));
17040   fprintf (stream, _("\
17041   -mavxscalar=[128|256] (default: 128)\n\
17042                           encode scalar AVX instructions with specific vector\n\
17043                            length\n"));
17044   fprintf (stream, _("\
17045   -mvexwig=[0|1] (default: 0)\n\
17046                           encode VEX instructions with specific VEX.W value\n\
17047                            for VEX.W bit ignored instructions\n"));
17048   fprintf (stream, _("\
17049   -mevexlig=[128|256|512] (default: 128)\n\
17050                           encode scalar EVEX instructions with specific vector\n\
17051                            length\n"));
17052   fprintf (stream, _("\
17053   -mevexwig=[0|1] (default: 0)\n\
17054                           encode EVEX instructions with specific EVEX.W value\n\
17055                            for EVEX.W bit ignored instructions\n"));
17056   fprintf (stream, _("\
17057   -mevexrcig=[rne|rd|ru|rz] (default: rne)\n\
17058                           encode EVEX instructions with specific EVEX.RC value\n\
17059                            for SAE-only ignored instructions\n"));
17060   fprintf (stream, _("\
17061   -mmnemonic=[att|intel] "));
17062   if (SYSV386_COMPAT)
17063     fprintf (stream, _("(default: att)\n"));
17064   else
17065     fprintf (stream, _("(default: intel)\n"));
17066   fprintf (stream, _("\
17067                           use AT&T/Intel mnemonic (AT&T syntax only)\n"));
17068   fprintf (stream, _("\
17069   -msyntax=[att|intel] (default: att)\n\
17070                           use AT&T/Intel syntax\n"));
17071   fprintf (stream, _("\
17072   -mindex-reg             support pseudo index registers\n"));
17073   fprintf (stream, _("\
17074   -mnaked-reg             don't require `%%' prefix for registers\n"));
17075   fprintf (stream, _("\
17076   -madd-bnd-prefix        add BND prefix for all valid branches\n"));
17077 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
17078   fprintf (stream, _("\
17079   -mshared                disable branch optimization for shared code\n"));
17080   fprintf (stream, _("\
17081   -mx86-used-note=[no|yes] "));
17082   if (DEFAULT_X86_USED_NOTE)
17083     fprintf (stream, _("(default: yes)\n"));
17084   else
17085     fprintf (stream, _("(default: no)\n"));
17086   fprintf (stream, _("\
17087                           generate x86 used ISA and feature properties\n"));
17088 #endif
17089 #if defined (TE_PE) || defined (TE_PEP)
17090   fprintf (stream, _("\
17091   -mbig-obj               generate big object files\n"));
17092 #endif
17093   fprintf (stream, _("\
17094   -momit-lock-prefix=[no|yes] (default: no)\n\
17095                           strip all lock prefixes\n"));
17096   fprintf (stream, _("\
17097   -mfence-as-lock-add=[no|yes] (default: no)\n\
17098                           encode lfence, mfence and sfence as\n\
17099                            lock addl $0x0, (%%{re}sp)\n"));
17100   fprintf (stream, _("\
17101   -mrelax-relocations=[no|yes] "));
17102   if (DEFAULT_GENERATE_X86_RELAX_RELOCATIONS)
17103     fprintf (stream, _("(default: yes)\n"));
17104   else
17105     fprintf (stream, _("(default: no)\n"));
17106   fprintf (stream, _("\
17107                           generate relax relocations\n"));
17108   fprintf (stream, _("\
17109   -malign-branch-boundary=NUM (default: 0)\n\
17110                           align branches within NUM byte boundary\n"));
17111   fprintf (stream, _("\
17112   -malign-branch=TYPE[+TYPE...] (default: jcc+fused+jmp)\n\
17113                           TYPE is combination of jcc, fused, jmp, call, ret,\n\
17114                            indirect\n\
17115                           specify types of branches to align\n"));
17116   fprintf (stream, _("\
17117   -malign-branch-prefix-size=NUM (default: 5)\n\
17118                           align branches with NUM prefixes per instruction\n"));
17119   fprintf (stream, _("\
17120   -mbranches-within-32B-boundaries\n\
17121                           align branches within 32 byte boundary\n"));
17122   fprintf (stream, _("\
17123   -mlfence-after-load=[no|yes] (default: no)\n\
17124                           generate lfence after load\n"));
17125   fprintf (stream, _("\
17126   -mlfence-before-indirect-branch=[none|all|register|memory] (default: none)\n\
17127                           generate lfence before indirect near branch\n"));
17128   fprintf (stream, _("\
17129   -mlfence-before-ret=[none|or|not|shl|yes] (default: none)\n\
17130                           generate lfence before ret\n"));
17131   fprintf (stream, _("\
17132   -mamd64                 accept only AMD64 ISA [default]\n"));
17133   fprintf (stream, _("\
17134   -mintel64               accept only Intel64 ISA\n"));
17135 }
17136
17137 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
17138      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
17139      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
17140
17141 /* Pick the target format to use.  */
17142
17143 const char *
17144 i386_target_format (void)
17145 {
17146   if (startswith (default_arch, "x86_64"))
17147     {
17148       update_code_flag (CODE_64BIT, 1);
17149 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
17150       if (default_arch[6] == '\0')
17151         x86_elf_abi = X86_64_ABI;
17152       else
17153         x86_elf_abi = X86_64_X32_ABI;
17154 #endif
17155     }
17156   else if (!strcmp (default_arch, "i386"))
17157     update_code_flag (CODE_32BIT, 1);
17158   else if (!strcmp (default_arch, "iamcu"))
17159     {
17160       update_code_flag (CODE_32BIT, 1);
17161       if (cpu_arch_isa == PROCESSOR_UNKNOWN)
17162         {
17163           static const i386_cpu_flags iamcu_flags = CPU_IAMCU_FLAGS;
17164           cpu_arch_name = "iamcu";
17165           free (cpu_sub_arch_name);
17166           cpu_sub_arch_name = NULL;
17167           cpu_arch_flags = iamcu_flags;
17168           cpu_arch_isa = PROCESSOR_IAMCU;
17169           cpu_arch_isa_flags = iamcu_flags;
17170           if (!cpu_arch_tune_set)
17171             cpu_arch_tune = PROCESSOR_IAMCU;
17172         }
17173       else if (cpu_arch_isa != PROCESSOR_IAMCU)
17174         as_fatal (_("Intel MCU doesn't support `%s' architecture"),
17175                   cpu_arch_name);
17176     }
17177   else
17178     as_fatal (_("unknown architecture"));
17179
17180 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
17181   if (IS_ELF && flag_synth_cfi && x86_elf_abi != X86_64_ABI)
17182     as_fatal (_("SCFI is not supported for this ABI"));
17183 #endif
17184
17185   if (cpu_flags_all_zero (&cpu_arch_isa_flags))
17186     cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
17187
17188   switch (OUTPUT_FLAVOR)
17189     {
17190 #if defined (OBJ_MAYBE_AOUT) || defined (OBJ_AOUT)
17191     case bfd_target_aout_flavour:
17192       return AOUT_TARGET_FORMAT;
17193 #endif
17194 #if defined (OBJ_MAYBE_COFF) || defined (OBJ_COFF)
17195 # if defined (TE_PE) || defined (TE_PEP)
17196     case bfd_target_coff_flavour:
17197       if (flag_code == CODE_64BIT)
17198         {
17199           object_64bit = 1;
17200           return use_big_obj ? "pe-bigobj-x86-64" : "pe-x86-64";
17201         }
17202       return use_big_obj ? "pe-bigobj-i386" : "pe-i386";
17203 # elif defined (TE_GO32)
17204     case bfd_target_coff_flavour:
17205       return "coff-go32";
17206 # else
17207     case bfd_target_coff_flavour:
17208       return "coff-i386";
17209 # endif
17210 #endif
17211 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
17212     case bfd_target_elf_flavour:
17213       {
17214         const char *format;
17215
17216         switch (x86_elf_abi)
17217           {
17218           default:
17219             format = ELF_TARGET_FORMAT;
17220 #ifndef TE_SOLARIS
17221             tls_get_addr = "___tls_get_addr";
17222 #endif
17223             break;
17224           case X86_64_ABI:
17225             use_rela_relocations = 1;
17226             object_64bit = 1;
17227 #ifndef TE_SOLARIS
17228             tls_get_addr = "__tls_get_addr";
17229 #endif
17230             format = ELF_TARGET_FORMAT64;
17231             break;
17232           case X86_64_X32_ABI:
17233             use_rela_relocations = 1;
17234             object_64bit = 1;
17235 #ifndef TE_SOLARIS
17236             tls_get_addr = "__tls_get_addr";
17237 #endif
17238             disallow_64bit_reloc = 1;
17239             format = ELF_TARGET_FORMAT32;
17240             break;
17241           }
17242         if (cpu_arch_isa == PROCESSOR_IAMCU)
17243           {
17244             if (x86_elf_abi != I386_ABI)
17245               as_fatal (_("Intel MCU is 32bit only"));
17246             return ELF_TARGET_IAMCU_FORMAT;
17247           }
17248         else
17249           return format;
17250       }
17251 #endif
17252 #if defined (OBJ_MACH_O)
17253     case bfd_target_mach_o_flavour:
17254       if (flag_code == CODE_64BIT)
17255         {
17256           use_rela_relocations = 1;
17257           object_64bit = 1;
17258           return "mach-o-x86-64";
17259         }
17260       else
17261         return "mach-o-i386";
17262 #endif
17263     default:
17264       abort ();
17265       return NULL;
17266     }
17267 }
17268
17269 #endif /* OBJ_MAYBE_ more than one  */
17270 \f
17271 symbolS *
17272 md_undefined_symbol (char *name)
17273 {
17274   if (name[0] == GLOBAL_OFFSET_TABLE_NAME[0]
17275       && name[1] == GLOBAL_OFFSET_TABLE_NAME[1]
17276       && name[2] == GLOBAL_OFFSET_TABLE_NAME[2]
17277       && strcmp (name, GLOBAL_OFFSET_TABLE_NAME) == 0)
17278     {
17279       if (!GOT_symbol)
17280         {
17281           if (symbol_find (name))
17282             as_bad (_("GOT already in symbol table"));
17283           GOT_symbol = symbol_new (name, undefined_section,
17284                                    &zero_address_frag, 0);
17285         };
17286       return GOT_symbol;
17287     }
17288   return 0;
17289 }
17290
17291 #if defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT)
17292 /* Round up a section size to the appropriate boundary.  */
17293
17294 valueT
17295 md_section_align (segT segment, valueT size)
17296 {
17297   if (OUTPUT_FLAVOR == bfd_target_aout_flavour)
17298     {
17299       /* For a.out, force the section size to be aligned.  If we don't do
17300          this, BFD will align it for us, but it will not write out the
17301          final bytes of the section.  This may be a bug in BFD, but it is
17302          easier to fix it here since that is how the other a.out targets
17303          work.  */
17304       int align;
17305
17306       align = bfd_section_alignment (segment);
17307       size = ((size + (1 << align) - 1) & (-((valueT) 1 << align)));
17308     }
17309
17310   return size;
17311 }
17312 #endif
17313
17314 /* On the i386, PC-relative offsets are relative to the start of the
17315    next instruction.  That is, the address of the offset, plus its
17316    size, since the offset is always the last part of the insn.  */
17317
17318 long
17319 md_pcrel_from (fixS *fixP)
17320 {
17321   return fixP->fx_size + fixP->fx_where + fixP->fx_frag->fr_address;
17322 }
17323
17324 #ifdef OBJ_AOUT
17325
17326 static void
17327 s_bss (int ignore ATTRIBUTE_UNUSED)
17328 {
17329   int temp;
17330
17331   temp = get_absolute_expression ();
17332   subseg_set (bss_section, (subsegT) temp);
17333   demand_empty_rest_of_line ();
17334 }
17335
17336 #endif
17337
17338 /* Remember constant directive.  */
17339
17340 void
17341 i386_cons_align (int ignore ATTRIBUTE_UNUSED)
17342 {
17343   struct last_insn *last_insn
17344     = &seg_info(now_seg)->tc_segment_info_data.last_insn;
17345
17346   if (bfd_section_flags (now_seg) & SEC_CODE)
17347     {
17348       last_insn->kind = last_insn_directive;
17349       last_insn->name = "constant directive";
17350       last_insn->file = as_where (&last_insn->line);
17351     }
17352 }
17353
17354 int
17355 i386_validate_fix (fixS *fixp)
17356 {
17357   if (fixp->fx_addsy && S_GET_SEGMENT(fixp->fx_addsy) == reg_section)
17358     {
17359       reloc_howto_type *howto;
17360
17361       howto = bfd_reloc_type_lookup (stdoutput, fixp->fx_r_type);
17362       as_bad_where (fixp->fx_file, fixp->fx_line,
17363                     _("invalid %s relocation against register"),
17364                     howto ? howto->name : "<unknown>");
17365       return 0;
17366     }
17367
17368 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
17369   if (fixp->fx_r_type == BFD_RELOC_SIZE32
17370       || fixp->fx_r_type == BFD_RELOC_SIZE64)
17371     return IS_ELF && fixp->fx_addsy
17372            && (!S_IS_DEFINED (fixp->fx_addsy)
17373                || S_IS_EXTERNAL (fixp->fx_addsy));
17374
17375   /* BFD_RELOC_X86_64_GOTTPOFF:
17376       1. fx_tcbit -> BFD_RELOC_X86_64_CODE_4_GOTTPOFF
17377       2. fx_tcbit2 -> BFD_RELOC_X86_64_CODE_6_GOTTPOFF
17378     BFD_RELOC_X86_64_GOTPC32_TLSDESC:
17379       1. fx_tcbit -> BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC
17380     BFD_RELOC_32_PCREL:
17381       1. fx_tcbit -> BFD_RELOC_X86_64_GOTPCRELX
17382       2. fx_tcbit2 -> BFD_RELOC_X86_64_REX_GOTPCRELX
17383       3. fx_tcbit3 -> BFD_RELOC_X86_64_CODE_4_GOTPCRELX
17384       4. else -> BFD_RELOC_X86_64_GOTPCREL
17385    */
17386   if (fixp->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF)
17387     {
17388       if (fixp->fx_tcbit)
17389         fixp->fx_r_type = BFD_RELOC_X86_64_CODE_4_GOTTPOFF;
17390       else if (fixp->fx_tcbit2)
17391         fixp->fx_r_type = BFD_RELOC_X86_64_CODE_6_GOTTPOFF;
17392     }
17393   else if (fixp->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
17394            && fixp->fx_tcbit)
17395     fixp->fx_r_type = BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC;
17396 #endif
17397
17398   if (fixp->fx_subsy)
17399     {
17400       if (fixp->fx_subsy == GOT_symbol)
17401         {
17402           if (fixp->fx_r_type == BFD_RELOC_32_PCREL)
17403             {
17404               if (!object_64bit)
17405                 abort ();
17406 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
17407               if (fixp->fx_tcbit)
17408                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCRELX;
17409               else if (fixp->fx_tcbit2)
17410                 fixp->fx_r_type = BFD_RELOC_X86_64_REX_GOTPCRELX;
17411               else if (fixp->fx_tcbit3)
17412                 fixp->fx_r_type = BFD_RELOC_X86_64_CODE_4_GOTPCRELX;
17413               else
17414 #endif
17415                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCREL;
17416             }
17417           else
17418             {
17419               if (!object_64bit)
17420                 fixp->fx_r_type = BFD_RELOC_386_GOTOFF;
17421               else
17422                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTOFF64;
17423             }
17424           fixp->fx_subsy = 0;
17425         }
17426     }
17427 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
17428   else
17429     {
17430       /* NB: Commit 292676c1 resolved PLT32 reloc aganst local symbol
17431          to section.  Since PLT32 relocation must be against symbols,
17432          turn such PLT32 relocation into PC32 relocation.  */
17433       if (fixp->fx_addsy
17434           && (fixp->fx_r_type == BFD_RELOC_386_PLT32
17435               || fixp->fx_r_type == BFD_RELOC_X86_64_PLT32)
17436           && symbol_section_p (fixp->fx_addsy))
17437         fixp->fx_r_type = BFD_RELOC_32_PCREL;
17438       if (!object_64bit)
17439         {
17440           if (fixp->fx_r_type == BFD_RELOC_386_GOT32
17441               && fixp->fx_tcbit2)
17442             fixp->fx_r_type = BFD_RELOC_386_GOT32X;
17443         }
17444     }
17445 #endif
17446
17447   return 1;
17448 }
17449
17450 arelent *
17451 tc_gen_reloc (asection *section ATTRIBUTE_UNUSED, fixS *fixp)
17452 {
17453   arelent *rel;
17454   bfd_reloc_code_real_type code;
17455
17456   switch (fixp->fx_r_type)
17457     {
17458 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
17459       symbolS *sym;
17460
17461     case BFD_RELOC_SIZE32:
17462     case BFD_RELOC_SIZE64:
17463       if (fixp->fx_addsy
17464           && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))
17465           && (!fixp->fx_subsy
17466               || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))))
17467         sym = fixp->fx_addsy;
17468       else if (fixp->fx_subsy
17469                && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))
17470                && (!fixp->fx_addsy
17471                    || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))))
17472         sym = fixp->fx_subsy;
17473       else
17474         sym = NULL;
17475       if (IS_ELF && sym && S_IS_DEFINED (sym) && !S_IS_EXTERNAL (sym))
17476         {
17477           /* Resolve size relocation against local symbol to size of
17478              the symbol plus addend.  */
17479           valueT value = S_GET_SIZE (sym);
17480
17481           if (symbol_get_bfdsym (sym)->flags & BSF_SECTION_SYM)
17482             value = bfd_section_size (S_GET_SEGMENT (sym));
17483           if (sym == fixp->fx_subsy)
17484             {
17485               value = -value;
17486               if (fixp->fx_addsy)
17487                 value += S_GET_VALUE (fixp->fx_addsy);
17488             }
17489           else if (fixp->fx_subsy)
17490             value -= S_GET_VALUE (fixp->fx_subsy);
17491           value += fixp->fx_offset;
17492           if (fixp->fx_r_type == BFD_RELOC_SIZE32
17493               && object_64bit
17494               && !fits_in_unsigned_long (value))
17495             as_bad_where (fixp->fx_file, fixp->fx_line,
17496                           _("symbol size computation overflow"));
17497           fixp->fx_addsy = NULL;
17498           fixp->fx_subsy = NULL;
17499           md_apply_fix (fixp, (valueT *) &value, NULL);
17500           return NULL;
17501         }
17502       if (!fixp->fx_addsy || fixp->fx_subsy)
17503         {
17504           as_bad_where (fixp->fx_file, fixp->fx_line,
17505                         "unsupported expression involving @size");
17506           return NULL;
17507         }
17508 #endif
17509       /* Fall through.  */
17510
17511     case BFD_RELOC_X86_64_PLT32:
17512     case BFD_RELOC_X86_64_GOT32:
17513     case BFD_RELOC_X86_64_GOTPCREL:
17514     case BFD_RELOC_X86_64_GOTPCRELX:
17515     case BFD_RELOC_X86_64_REX_GOTPCRELX:
17516     case BFD_RELOC_X86_64_CODE_4_GOTPCRELX:
17517     case BFD_RELOC_386_PLT32:
17518     case BFD_RELOC_386_GOT32:
17519     case BFD_RELOC_386_GOT32X:
17520     case BFD_RELOC_386_GOTOFF:
17521     case BFD_RELOC_386_GOTPC:
17522     case BFD_RELOC_386_TLS_GD:
17523     case BFD_RELOC_386_TLS_LDM:
17524     case BFD_RELOC_386_TLS_LDO_32:
17525     case BFD_RELOC_386_TLS_IE_32:
17526     case BFD_RELOC_386_TLS_IE:
17527     case BFD_RELOC_386_TLS_GOTIE:
17528     case BFD_RELOC_386_TLS_LE_32:
17529     case BFD_RELOC_386_TLS_LE:
17530     case BFD_RELOC_386_TLS_GOTDESC:
17531     case BFD_RELOC_386_TLS_DESC_CALL:
17532     case BFD_RELOC_X86_64_TLSGD:
17533     case BFD_RELOC_X86_64_TLSLD:
17534     case BFD_RELOC_X86_64_DTPOFF32:
17535     case BFD_RELOC_X86_64_DTPOFF64:
17536     case BFD_RELOC_X86_64_GOTTPOFF:
17537     case BFD_RELOC_X86_64_CODE_4_GOTTPOFF:
17538     case BFD_RELOC_X86_64_CODE_6_GOTTPOFF:
17539     case BFD_RELOC_X86_64_TPOFF32:
17540     case BFD_RELOC_X86_64_TPOFF64:
17541     case BFD_RELOC_X86_64_GOTOFF64:
17542     case BFD_RELOC_X86_64_GOTPC32:
17543     case BFD_RELOC_X86_64_GOT64:
17544     case BFD_RELOC_X86_64_GOTPCREL64:
17545     case BFD_RELOC_X86_64_GOTPC64:
17546     case BFD_RELOC_X86_64_GOTPLT64:
17547     case BFD_RELOC_X86_64_PLTOFF64:
17548     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
17549     case BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC:
17550     case BFD_RELOC_X86_64_TLSDESC_CALL:
17551     case BFD_RELOC_RVA:
17552     case BFD_RELOC_VTABLE_ENTRY:
17553     case BFD_RELOC_VTABLE_INHERIT:
17554 #ifdef TE_PE
17555     case BFD_RELOC_32_SECREL:
17556     case BFD_RELOC_16_SECIDX:
17557 #endif
17558       code = fixp->fx_r_type;
17559       break;
17560     case BFD_RELOC_X86_64_32S:
17561       if (!fixp->fx_pcrel)
17562         {
17563           /* Don't turn BFD_RELOC_X86_64_32S into BFD_RELOC_32.  */
17564           code = fixp->fx_r_type;
17565           break;
17566         }
17567       /* Fall through.  */
17568     default:
17569       if (fixp->fx_pcrel)
17570         {
17571           switch (fixp->fx_size)
17572             {
17573             default:
17574               as_bad_where (fixp->fx_file, fixp->fx_line,
17575                             _("can not do %d byte pc-relative relocation"),
17576                             fixp->fx_size);
17577               code = BFD_RELOC_32_PCREL;
17578               break;
17579             case 1: code = BFD_RELOC_8_PCREL;  break;
17580             case 2: code = BFD_RELOC_16_PCREL; break;
17581             case 4: code = BFD_RELOC_32_PCREL; break;
17582 #ifdef BFD64
17583             case 8: code = BFD_RELOC_64_PCREL; break;
17584 #endif
17585             }
17586         }
17587       else
17588         {
17589           switch (fixp->fx_size)
17590             {
17591             default:
17592               as_bad_where (fixp->fx_file, fixp->fx_line,
17593                             _("can not do %d byte relocation"),
17594                             fixp->fx_size);
17595               code = BFD_RELOC_32;
17596               break;
17597             case 1: code = BFD_RELOC_8;  break;
17598             case 2: code = BFD_RELOC_16; break;
17599             case 4: code = BFD_RELOC_32; break;
17600 #ifdef BFD64
17601             case 8: code = BFD_RELOC_64; break;
17602 #endif
17603             }
17604         }
17605       break;
17606     }
17607
17608   if ((code == BFD_RELOC_32
17609        || code == BFD_RELOC_32_PCREL
17610        || code == BFD_RELOC_X86_64_32S)
17611       && GOT_symbol
17612       && fixp->fx_addsy == GOT_symbol)
17613     {
17614       if (!object_64bit)
17615         code = BFD_RELOC_386_GOTPC;
17616       else
17617         code = BFD_RELOC_X86_64_GOTPC32;
17618     }
17619   if ((code == BFD_RELOC_64 || code == BFD_RELOC_64_PCREL)
17620       && GOT_symbol
17621       && fixp->fx_addsy == GOT_symbol)
17622     {
17623       code = BFD_RELOC_X86_64_GOTPC64;
17624     }
17625
17626   rel = XNEW (arelent);
17627   rel->sym_ptr_ptr = XNEW (asymbol *);
17628   *rel->sym_ptr_ptr = symbol_get_bfdsym (fixp->fx_addsy);
17629
17630   rel->address = fixp->fx_frag->fr_address + fixp->fx_where;
17631
17632   if (!use_rela_relocations)
17633     {
17634       /* HACK: Since i386 ELF uses Rel instead of Rela, encode the
17635          vtable entry to be used in the relocation's section offset.  */
17636       if (fixp->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
17637         rel->address = fixp->fx_offset;
17638 #if defined (OBJ_COFF) && defined (TE_PE)
17639       else if (fixp->fx_addsy && S_IS_WEAK (fixp->fx_addsy))
17640         rel->addend = fixp->fx_addnumber - (S_GET_VALUE (fixp->fx_addsy) * 2);
17641       else
17642 #endif
17643       rel->addend = 0;
17644     }
17645   /* Use the rela in 64bit mode.  */
17646   else
17647     {
17648       if (disallow_64bit_reloc)
17649         switch (code)
17650           {
17651           case BFD_RELOC_X86_64_DTPOFF64:
17652           case BFD_RELOC_X86_64_TPOFF64:
17653           case BFD_RELOC_64_PCREL:
17654           case BFD_RELOC_X86_64_GOTOFF64:
17655           case BFD_RELOC_X86_64_GOT64:
17656           case BFD_RELOC_X86_64_GOTPCREL64:
17657           case BFD_RELOC_X86_64_GOTPC64:
17658           case BFD_RELOC_X86_64_GOTPLT64:
17659           case BFD_RELOC_X86_64_PLTOFF64:
17660             as_bad_where (fixp->fx_file, fixp->fx_line,
17661                           _("cannot represent relocation type %s in x32 mode"),
17662                           bfd_get_reloc_code_name (code));
17663             break;
17664           default:
17665             break;
17666           }
17667
17668       if (!fixp->fx_pcrel)
17669         rel->addend = fixp->fx_offset;
17670       else
17671         switch (code)
17672           {
17673           case BFD_RELOC_X86_64_PLT32:
17674           case BFD_RELOC_X86_64_GOT32:
17675           case BFD_RELOC_X86_64_GOTPCREL:
17676           case BFD_RELOC_X86_64_GOTPCRELX:
17677           case BFD_RELOC_X86_64_REX_GOTPCRELX:
17678           case BFD_RELOC_X86_64_CODE_4_GOTPCRELX:
17679           case BFD_RELOC_X86_64_TLSGD:
17680           case BFD_RELOC_X86_64_TLSLD:
17681           case BFD_RELOC_X86_64_GOTTPOFF:
17682           case BFD_RELOC_X86_64_CODE_4_GOTTPOFF:
17683           case BFD_RELOC_X86_64_CODE_6_GOTTPOFF:
17684           case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
17685           case BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC:
17686           case BFD_RELOC_X86_64_TLSDESC_CALL:
17687             rel->addend = fixp->fx_offset - fixp->fx_size;
17688             break;
17689           default:
17690             rel->addend = (section->vma
17691                            - fixp->fx_size
17692                            + fixp->fx_addnumber
17693                            + md_pcrel_from (fixp));
17694             break;
17695           }
17696     }
17697
17698   rel->howto = bfd_reloc_type_lookup (stdoutput, code);
17699   if (rel->howto == NULL)
17700     {
17701       as_bad_where (fixp->fx_file, fixp->fx_line,
17702                     _("cannot represent relocation type %s"),
17703                     bfd_get_reloc_code_name (code));
17704       /* Set howto to a garbage value so that we can keep going.  */
17705       rel->howto = bfd_reloc_type_lookup (stdoutput, BFD_RELOC_32);
17706       gas_assert (rel->howto != NULL);
17707     }
17708
17709   return rel;
17710 }
17711
17712 #include "tc-i386-intel.c"
17713
17714 void
17715 tc_x86_parse_to_dw2regnum (expressionS *exp)
17716 {
17717   int saved_naked_reg;
17718   char saved_register_dot;
17719
17720   saved_naked_reg = allow_naked_reg;
17721   allow_naked_reg = 1;
17722   saved_register_dot = register_chars['.'];
17723   register_chars['.'] = '.';
17724   allow_pseudo_reg = 1;
17725   expression_and_evaluate (exp);
17726   allow_pseudo_reg = 0;
17727   register_chars['.'] = saved_register_dot;
17728   allow_naked_reg = saved_naked_reg;
17729
17730   if (exp->X_op == O_register && exp->X_add_number >= 0)
17731     {
17732       exp->X_op = O_illegal;
17733       if ((addressT) exp->X_add_number < i386_regtab_size)
17734         {
17735           exp->X_add_number = i386_regtab[exp->X_add_number]
17736                               .dw2_regnum[object_64bit];
17737           if (exp->X_add_number != Dw2Inval)
17738             exp->X_op = O_constant;
17739         }
17740     }
17741 }
17742
17743 void
17744 tc_x86_frame_initial_instructions (void)
17745 {
17746   cfi_add_CFA_def_cfa (object_64bit ? REG_SP : 4, -x86_cie_data_alignment);
17747   cfi_add_CFA_offset (x86_dwarf2_return_column, x86_cie_data_alignment);
17748 }
17749
17750 int
17751 x86_dwarf2_addr_size (void)
17752 {
17753 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
17754   if (x86_elf_abi == X86_64_X32_ABI)
17755     return 4;
17756 #endif
17757   return bfd_arch_bits_per_address (stdoutput) / 8;
17758 }
17759
17760 #ifdef TE_PE
17761 void
17762 tc_pe_dwarf2_emit_offset (symbolS *symbol, unsigned int size)
17763 {
17764   expressionS exp;
17765
17766   exp.X_op = O_secrel;
17767   exp.X_add_symbol = symbol;
17768   exp.X_add_number = 0;
17769   emit_expr (&exp, size);
17770 }
17771 #endif
17772
17773 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
17774 int
17775 i386_elf_section_type (const char *str, size_t len)
17776 {
17777   if (flag_code == CODE_64BIT
17778       && len == sizeof ("unwind") - 1
17779       && startswith (str, "unwind"))
17780     return SHT_X86_64_UNWIND;
17781
17782   return -1;
17783 }
17784
17785 void
17786 i386_elf_section_change_hook (void)
17787 {
17788   struct i386_segment_info *info = &seg_info(now_seg)->tc_segment_info_data;
17789   struct i386_segment_info *curr, *prev;
17790
17791   if (info->subseg == now_subseg)
17792     return;
17793
17794   /* Find the (or make a) list entry to save state into.  */
17795   for (prev = info; (curr = prev->next) != NULL; prev = curr)
17796     if (curr->subseg == info->subseg)
17797       break;
17798   if (!curr)
17799     {
17800       curr = notes_alloc (sizeof (*curr));
17801       curr->subseg = info->subseg;
17802       curr->next = NULL;
17803       prev->next = curr;
17804     }
17805   curr->last_insn = info->last_insn;
17806
17807   /* Find the list entry to load state from.  */
17808   for (curr = info->next; curr; curr = curr->next)
17809     if (curr->subseg == now_subseg)
17810       break;
17811   if (curr)
17812     info->last_insn = curr->last_insn;
17813   else
17814     memset (&info->last_insn, 0, sizeof (info->last_insn));
17815   info->subseg = now_subseg;
17816 }
17817
17818 #ifdef TE_SOLARIS
17819 void
17820 i386_solaris_fix_up_eh_frame (segT sec)
17821 {
17822   if (flag_code == CODE_64BIT)
17823     elf_section_type (sec) = SHT_X86_64_UNWIND;
17824 }
17825 #endif
17826
17827 /* For ELF on x86-64, add support for SHF_X86_64_LARGE.  */
17828
17829 bfd_vma
17830 x86_64_section_letter (int letter, const char **ptr_msg)
17831 {
17832   if (flag_code == CODE_64BIT)
17833     {
17834       if (letter == 'l')
17835         return SHF_X86_64_LARGE;
17836
17837       *ptr_msg = _("bad .section directive: want a,l,w,x,M,S,G,T in string");
17838     }
17839   else
17840     *ptr_msg = _("bad .section directive: want a,w,x,M,S,G,T in string");
17841   return -1;
17842 }
17843
17844 static void
17845 handle_large_common (int small ATTRIBUTE_UNUSED)
17846 {
17847   if (flag_code != CODE_64BIT)
17848     {
17849       s_comm_internal (0, elf_common_parse);
17850       as_warn (_(".largecomm supported only in 64bit mode, producing .comm"));
17851     }
17852   else
17853     {
17854       static segT lbss_section;
17855       asection *saved_com_section_ptr = elf_com_section_ptr;
17856       asection *saved_bss_section = bss_section;
17857
17858       if (lbss_section == NULL)
17859         {
17860           flagword applicable;
17861           segT seg = now_seg;
17862           subsegT subseg = now_subseg;
17863
17864           /* The .lbss section is for local .largecomm symbols.  */
17865           lbss_section = subseg_new (".lbss", 0);
17866           applicable = bfd_applicable_section_flags (stdoutput);
17867           bfd_set_section_flags (lbss_section, applicable & SEC_ALLOC);
17868           seg_info (lbss_section)->bss = 1;
17869
17870           subseg_set (seg, subseg);
17871         }
17872
17873       elf_com_section_ptr = &_bfd_elf_large_com_section;
17874       bss_section = lbss_section;
17875
17876       s_comm_internal (0, elf_common_parse);
17877
17878       elf_com_section_ptr = saved_com_section_ptr;
17879       bss_section = saved_bss_section;
17880     }
17881 }
17882 #endif /* OBJ_ELF || OBJ_MAYBE_ELF */