gas/config/tc-i386.c

   1 /* tc-i386.c -- Assemble code for the Intel 80386
   2    Copyright (C) 1989-2022 Free Software Foundation, Inc.
   3
   4    This file is part of GAS, the GNU Assembler.
   5
   6    GAS is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GAS is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GAS; see the file COPYING.  If not, write to the Free
  18    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
  19    02110-1301, USA.  */
  20
  21 /* Intel 80386 machine specific gas.
  22    Written by Eliot Dresselhaus (eliot@mgm.mit.edu).
  23    x86_64 support by Jan Hubicka (jh@suse.cz)
  24    VIA PadLock support by Michal Ludvig (mludvig@suse.cz)
  25    Bugs & suggestions are completely welcome.  This is free software.
  26    Please help us make it better.  */
  27
  28 #include "as.h"
  29 #include "safe-ctype.h"
  30 #include "subsegs.h"
  31 #include "dwarf2dbg.h"
  32 #include "dw2gencfi.h"
  33 #include "gen-sframe.h"
  34 #include "sframe.h"
  35 #include "elf/x86-64.h"
  36 #include "opcodes/i386-init.h"
  37 #include <limits.h>
  38
  39 #ifndef INFER_ADDR_PREFIX
  40 #define INFER_ADDR_PREFIX 1
  41 #endif
  42
  43 #ifndef DEFAULT_ARCH
  44 #define DEFAULT_ARCH "i386"
  45 #endif
  46
  47 #ifndef INLINE
  48 #if __GNUC__ >= 2
  49 #define INLINE __inline__
  50 #else
  51 #define INLINE
  52 #endif
  53 #endif
  54
  55 /* Prefixes will be emitted in the order defined below.
  56    WAIT_PREFIX must be the first prefix since FWAIT is really is an
  57    instruction, and so must come before any prefixes.
  58    The preferred prefix order is SEG_PREFIX, ADDR_PREFIX, DATA_PREFIX,
  59    REP_PREFIX/HLE_PREFIX, LOCK_PREFIX.  */
  60 #define WAIT_PREFIX     0
  61 #define SEG_PREFIX      1
  62 #define ADDR_PREFIX     2
  63 #define DATA_PREFIX     3
  64 #define REP_PREFIX      4
  65 #define HLE_PREFIX      REP_PREFIX
  66 #define BND_PREFIX      REP_PREFIX
  67 #define LOCK_PREFIX     5
  68 #define REX_PREFIX      6       /* must come last.  */
  69 #define MAX_PREFIXES    7       /* max prefixes per opcode */
  70
  71 /* we define the syntax here (modulo base,index,scale syntax) */
  72 #define REGISTER_PREFIX '%'
  73 #define IMMEDIATE_PREFIX '$'
  74 #define ABSOLUTE_PREFIX '*'
  75
  76 /* these are the instruction mnemonic suffixes in AT&T syntax or
  77    memory operand size in Intel syntax.  */
  78 #define WORD_MNEM_SUFFIX  'w'
  79 #define BYTE_MNEM_SUFFIX  'b'
  80 #define SHORT_MNEM_SUFFIX 's'
  81 #define LONG_MNEM_SUFFIX  'l'
  82 #define QWORD_MNEM_SUFFIX  'q'
  83 /* Intel Syntax.  Use a non-ascii letter since since it never appears
  84    in instructions.  */
  85 #define LONG_DOUBLE_MNEM_SUFFIX '\1'
  86
  87 #define END_OF_INSN '\0'
  88
  89 /* This matches the C -> StaticRounding alias in the opcode table.  */
  90 #define commutative staticrounding
  91
  92 /*
  93   'templates' is for grouping together 'template' structures for opcodes
  94   of the same name.  This is only used for storing the insns in the grand
  95   ole hash table of insns.
  96   The templates themselves start at START and range up to (but not including)
  97   END.
  98   */
  99 typedef struct
 100 {
 101   const insn_template *start;
 102   const insn_template *end;
 103 }
 104 templates;
 105
 106 /* 386 operand encoding bytes:  see 386 book for details of this.  */
 107 typedef struct
 108 {
 109   unsigned int regmem;  /* codes register or memory operand */
 110   unsigned int reg;     /* codes register operand (or extended opcode) */
 111   unsigned int mode;    /* how to interpret regmem & reg */
 112 }
 113 modrm_byte;
 114
 115 /* x86-64 extension prefix.  */
 116 typedef int rex_byte;
 117
 118 /* 386 opcode byte to code indirect addressing.  */
 119 typedef struct
 120 {
 121   unsigned base;
 122   unsigned index;
 123   unsigned scale;
 124 }
 125 sib_byte;
 126
 127 /* x86 arch names, types and features */
 128 typedef struct
 129 {
 130   const char *name;             /* arch name */
 131   unsigned int len:8;           /* arch string length */
 132   bool skip:1;                  /* show_arch should skip this. */
 133   enum processor_type type;     /* arch type */
 134   i386_cpu_flags enable;                /* cpu feature enable flags */
 135   i386_cpu_flags disable;       /* cpu feature disable flags */
 136 }
 137 arch_entry;
 138
 139 static void update_code_flag (int, int);
 140 static void set_code_flag (int);
 141 static void set_16bit_gcc_code_flag (int);
 142 static void set_intel_syntax (int);
 143 static void set_intel_mnemonic (int);
 144 static void set_allow_index_reg (int);
 145 static void set_check (int);
 146 static void set_cpu_arch (int);
 147 #ifdef TE_PE
 148 static void pe_directive_secrel (int);
 149 static void pe_directive_secidx (int);
 150 #endif
 151 static void signed_cons (int);
 152 static char *output_invalid (int c);
 153 static int i386_finalize_immediate (segT, expressionS *, i386_operand_type,
 154                                     const char *);
 155 static int i386_finalize_displacement (segT, expressionS *, i386_operand_type,
 156                                        const char *);
 157 static int i386_att_operand (char *);
 158 static int i386_intel_operand (char *, int);
 159 static int i386_intel_simplify (expressionS *);
 160 static int i386_intel_parse_name (const char *, expressionS *);
 161 static const reg_entry *parse_register (char *, char **);
 162 static char *parse_insn (char *, char *);
 163 static char *parse_operands (char *, const char *);
 164 static void swap_operands (void);
 165 static void swap_2_operands (unsigned int, unsigned int);
 166 static enum flag_code i386_addressing_mode (void);
 167 static void optimize_imm (void);
 168 static void optimize_disp (void);
 169 static const insn_template *match_template (char);
 170 static int check_string (void);
 171 static int process_suffix (void);
 172 static int check_byte_reg (void);
 173 static int check_long_reg (void);
 174 static int check_qword_reg (void);
 175 static int check_word_reg (void);
 176 static int finalize_imm (void);
 177 static int process_operands (void);
 178 static const reg_entry *build_modrm_byte (void);
 179 static void output_insn (void);
 180 static void output_imm (fragS *, offsetT);
 181 static void output_disp (fragS *, offsetT);
 182 #ifndef I386COFF
 183 static void s_bss (int);
 184 #endif
 185 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 186 static void handle_large_common (int small ATTRIBUTE_UNUSED);
 187
 188 /* GNU_PROPERTY_X86_ISA_1_USED.  */
 189 static unsigned int x86_isa_1_used;
 190 /* GNU_PROPERTY_X86_FEATURE_2_USED.  */
 191 static unsigned int x86_feature_2_used;
 192 /* Generate x86 used ISA and feature properties.  */
 193 static unsigned int x86_used_note = DEFAULT_X86_USED_NOTE;
 194 #endif
 195
 196 static const char *default_arch = DEFAULT_ARCH;
 197
 198 /* parse_register() returns this when a register alias cannot be used.  */
 199 static const reg_entry bad_reg = { "<bad>", OPERAND_TYPE_NONE, 0, 0,
 200                                    { Dw2Inval, Dw2Inval } };
 201
 202 static const reg_entry *reg_eax;
 203 static const reg_entry *reg_ds;
 204 static const reg_entry *reg_es;
 205 static const reg_entry *reg_ss;
 206 static const reg_entry *reg_st0;
 207 static const reg_entry *reg_k0;
 208
 209 /* VEX prefix.  */
 210 typedef struct
 211 {
 212   /* VEX prefix is either 2 byte or 3 byte.  EVEX is 4 byte.  */
 213   unsigned char bytes[4];
 214   unsigned int length;
 215   /* Destination or source register specifier.  */
 216   const reg_entry *register_specifier;
 217 } vex_prefix;
 218
 219 /* 'md_assemble ()' gathers together information and puts it into a
 220    i386_insn.  */
 221
 222 union i386_op
 223   {
 224     expressionS *disps;
 225     expressionS *imms;
 226     const reg_entry *regs;
 227   };
 228
 229 enum i386_error
 230   {
 231     no_error, /* Must be first.  */
 232     operand_size_mismatch,
 233     operand_type_mismatch,
 234     register_type_mismatch,
 235     number_of_operands_mismatch,
 236     invalid_instruction_suffix,
 237     bad_imm4,
 238     unsupported_with_intel_mnemonic,
 239     unsupported_syntax,
 240     unsupported,
 241     invalid_sib_address,
 242     invalid_vsib_address,
 243     invalid_vector_register_set,
 244     invalid_tmm_register_set,
 245     invalid_dest_and_src_register_set,
 246     unsupported_vector_index_register,
 247     unsupported_broadcast,
 248     broadcast_needed,
 249     unsupported_masking,
 250     mask_not_on_destination,
 251     no_default_mask,
 252     unsupported_rc_sae,
 253     invalid_register_operand,
 254   };
 255
 256 struct _i386_insn
 257   {
 258     /* TM holds the template for the insn were currently assembling.  */
 259     insn_template tm;
 260
 261     /* SUFFIX holds the instruction size suffix for byte, word, dword
 262        or qword, if given.  */
 263     char suffix;
 264
 265     /* OPCODE_LENGTH holds the number of base opcode bytes.  */
 266     unsigned char opcode_length;
 267
 268     /* OPERANDS gives the number of given operands.  */
 269     unsigned int operands;
 270
 271     /* REG_OPERANDS, DISP_OPERANDS, MEM_OPERANDS, IMM_OPERANDS give the number
 272        of given register, displacement, memory operands and immediate
 273        operands.  */
 274     unsigned int reg_operands, disp_operands, mem_operands, imm_operands;
 275
 276     /* TYPES [i] is the type (see above #defines) which tells us how to
 277        use OP[i] for the corresponding operand.  */
 278     i386_operand_type types[MAX_OPERANDS];
 279
 280     /* Displacement expression, immediate expression, or register for each
 281        operand.  */
 282     union i386_op op[MAX_OPERANDS];
 283
 284     /* Flags for operands.  */
 285     unsigned int flags[MAX_OPERANDS];
 286 #define Operand_PCrel 1
 287 #define Operand_Mem   2
 288
 289     /* Relocation type for operand */
 290     enum bfd_reloc_code_real reloc[MAX_OPERANDS];
 291
 292     /* BASE_REG, INDEX_REG, and LOG2_SCALE_FACTOR are used to encode
 293        the base index byte below.  */
 294     const reg_entry *base_reg;
 295     const reg_entry *index_reg;
 296     unsigned int log2_scale_factor;
 297
 298     /* SEG gives the seg_entries of this insn.  They are zero unless
 299        explicit segment overrides are given.  */
 300     const reg_entry *seg[2];
 301
 302     /* Copied first memory operand string, for re-checking.  */
 303     char *memop1_string;
 304
 305     /* PREFIX holds all the given prefix opcodes (usually null).
 306        PREFIXES is the number of prefix opcodes.  */
 307     unsigned int prefixes;
 308     unsigned char prefix[MAX_PREFIXES];
 309
 310     /* Register is in low 3 bits of opcode.  */
 311     bool short_form;
 312
 313     /* The operand to a branch insn indicates an absolute branch.  */
 314     bool jumpabsolute;
 315
 316     /* There is a memory operand of (%dx) which should be only used
 317        with input/output instructions.  */
 318     bool input_output_operand;
 319
 320     /* Extended states.  */
 321     enum
 322       {
 323         /* Use MMX state.  */
 324         xstate_mmx = 1 << 0,
 325         /* Use XMM state.  */
 326         xstate_xmm = 1 << 1,
 327         /* Use YMM state.  */
 328         xstate_ymm = 1 << 2 | xstate_xmm,
 329         /* Use ZMM state.  */
 330         xstate_zmm = 1 << 3 | xstate_ymm,
 331         /* Use TMM state.  */
 332         xstate_tmm = 1 << 4,
 333         /* Use MASK state.  */
 334         xstate_mask = 1 << 5
 335       } xstate;
 336
 337     /* Has GOTPC or TLS relocation.  */
 338     bool has_gotpc_tls_reloc;
 339
 340     /* RM and SIB are the modrm byte and the sib byte where the
 341        addressing modes of this insn are encoded.  */
 342     modrm_byte rm;
 343     rex_byte rex;
 344     rex_byte vrex;
 345     sib_byte sib;
 346     vex_prefix vex;
 347
 348     /* Masking attributes.
 349
 350        The struct describes masking, applied to OPERAND in the instruction.
 351        REG is a pointer to the corresponding mask register.  ZEROING tells
 352        whether merging or zeroing mask is used.  */
 353     struct Mask_Operation
 354     {
 355       const reg_entry *reg;
 356       unsigned int zeroing;
 357       /* The operand where this operation is associated.  */
 358       unsigned int operand;
 359     } mask;
 360
 361     /* Rounding control and SAE attributes.  */
 362     struct RC_Operation
 363     {
 364       enum rc_type
 365         {
 366           rc_none = -1,
 367           rne,
 368           rd,
 369           ru,
 370           rz,
 371           saeonly
 372         } type;
 373       /* In Intel syntax the operand modifier form is supposed to be used, but
 374          we continue to accept the immediate forms as well.  */
 375       bool modifier;
 376     } rounding;
 377
 378     /* Broadcasting attributes.
 379
 380        The struct describes broadcasting, applied to OPERAND.  TYPE is
 381        expresses the broadcast factor.  */
 382     struct Broadcast_Operation
 383     {
 384       /* Type of broadcast: {1to2}, {1to4}, {1to8}, {1to16} or {1to32}.  */
 385       unsigned int type;
 386
 387       /* Index of broadcasted operand.  */
 388       unsigned int operand;
 389
 390       /* Number of bytes to broadcast.  */
 391       unsigned int bytes;
 392     } broadcast;
 393
 394     /* Compressed disp8*N attribute.  */
 395     unsigned int memshift;
 396
 397     /* Prefer load or store in encoding.  */
 398     enum
 399       {
 400         dir_encoding_default = 0,
 401         dir_encoding_load,
 402         dir_encoding_store,
 403         dir_encoding_swap
 404       } dir_encoding;
 405
 406     /* Prefer 8bit, 16bit, 32bit displacement in encoding.  */
 407     enum
 408       {
 409         disp_encoding_default = 0,
 410         disp_encoding_8bit,
 411         disp_encoding_16bit,
 412         disp_encoding_32bit
 413       } disp_encoding;
 414
 415     /* Prefer the REX byte in encoding.  */
 416     bool rex_encoding;
 417
 418     /* Disable instruction size optimization.  */
 419     bool no_optimize;
 420
 421     /* How to encode vector instructions.  */
 422     enum
 423       {
 424         vex_encoding_default = 0,
 425         vex_encoding_vex,
 426         vex_encoding_vex3,
 427         vex_encoding_evex,
 428         vex_encoding_error
 429       } vec_encoding;
 430
 431     /* REP prefix.  */
 432     const char *rep_prefix;
 433
 434     /* HLE prefix.  */
 435     const char *hle_prefix;
 436
 437     /* Have BND prefix.  */
 438     const char *bnd_prefix;
 439
 440     /* Have NOTRACK prefix.  */
 441     const char *notrack_prefix;
 442
 443     /* Error message.  */
 444     enum i386_error error;
 445   };
 446
 447 typedef struct _i386_insn i386_insn;
 448
 449 /* Link RC type with corresponding string, that'll be looked for in
 450    asm.  */
 451 struct RC_name
 452 {
 453   enum rc_type type;
 454   const char *name;
 455   unsigned int len;
 456 };
 457
 458 static const struct RC_name RC_NamesTable[] =
 459 {
 460   {  rne, STRING_COMMA_LEN ("rn-sae") },
 461   {  rd,  STRING_COMMA_LEN ("rd-sae") },
 462   {  ru,  STRING_COMMA_LEN ("ru-sae") },
 463   {  rz,  STRING_COMMA_LEN ("rz-sae") },
 464   {  saeonly,  STRING_COMMA_LEN ("sae") },
 465 };
 466
 467 /* To be indexed by segment register number.  */
 468 static const unsigned char i386_seg_prefixes[] = {
 469   ES_PREFIX_OPCODE,
 470   CS_PREFIX_OPCODE,
 471   SS_PREFIX_OPCODE,
 472   DS_PREFIX_OPCODE,
 473   FS_PREFIX_OPCODE,
 474   GS_PREFIX_OPCODE
 475 };
 476
 477 /* List of chars besides those in app.c:symbol_chars that can start an
 478    operand.  Used to prevent the scrubber eating vital white-space.  */
 479 const char extra_symbol_chars[] = "*%-([{}"
 480 #ifdef LEX_AT
 481         "@"
 482 #endif
 483 #ifdef LEX_QM
 484         "?"
 485 #endif
 486         ;
 487
 488 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF))     \
 489      && !defined (TE_GNU)                               \
 490      && !defined (TE_LINUX)                             \
 491      && !defined (TE_Haiku)                             \
 492      && !defined (TE_FreeBSD)                           \
 493      && !defined (TE_DragonFly)                         \
 494      && !defined (TE_NetBSD))
 495 /* This array holds the chars that always start a comment.  If the
 496    pre-processor is disabled, these aren't very useful.  The option
 497    --divide will remove '/' from this list.  */
 498 const char *i386_comment_chars = "#/";
 499 #define SVR4_COMMENT_CHARS 1
 500 #define PREFIX_SEPARATOR '\\'
 501
 502 #else
 503 const char *i386_comment_chars = "#";
 504 #define PREFIX_SEPARATOR '/'
 505 #endif
 506
 507 /* This array holds the chars that only start a comment at the beginning of
 508    a line.  If the line seems to have the form '# 123 filename'
 509    .line and .file directives will appear in the pre-processed output.
 510    Note that input_file.c hand checks for '#' at the beginning of the
 511    first line of the input file.  This is because the compiler outputs
 512    #NO_APP at the beginning of its output.
 513    Also note that comments started like this one will always work if
 514    '/' isn't otherwise defined.  */
 515 const char line_comment_chars[] = "#/";
 516
 517 const char line_separator_chars[] = ";";
 518
 519 /* Chars that can be used to separate mant from exp in floating point
 520    nums.  */
 521 const char EXP_CHARS[] = "eE";
 522
 523 /* Chars that mean this number is a floating point constant
 524    As in 0f12.456
 525    or    0d1.2345e12.  */
 526 const char FLT_CHARS[] = "fFdDxXhHbB";
 527
 528 /* Tables for lexical analysis.  */
 529 static char mnemonic_chars[256];
 530 static char register_chars[256];
 531 static char operand_chars[256];
 532 static char identifier_chars[256];
 533
 534 /* Lexical macros.  */
 535 #define is_mnemonic_char(x) (mnemonic_chars[(unsigned char) x])
 536 #define is_operand_char(x) (operand_chars[(unsigned char) x])
 537 #define is_register_char(x) (register_chars[(unsigned char) x])
 538 #define is_space_char(x) ((x) == ' ')
 539 #define is_identifier_char(x) (identifier_chars[(unsigned char) x])
 540
 541 /* All non-digit non-letter characters that may occur in an operand.  */
 542 static char operand_special_chars[] = "%$-+(,)*._~/<>|&^!:[@]";
 543
 544 /* md_assemble() always leaves the strings it's passed unaltered.  To
 545    effect this we maintain a stack of saved characters that we've smashed
 546    with '\0's (indicating end of strings for various sub-fields of the
 547    assembler instruction).  */
 548 static char save_stack[32];
 549 static char *save_stack_p;
 550 #define END_STRING_AND_SAVE(s) \
 551         do { *save_stack_p++ = *(s); *(s) = '\0'; } while (0)
 552 #define RESTORE_END_STRING(s) \
 553         do { *(s) = *--save_stack_p; } while (0)
 554
 555 /* The instruction we're assembling.  */
 556 static i386_insn i;
 557
 558 /* Possible templates for current insn.  */
 559 static const templates *current_templates;
 560
 561 /* Per instruction expressionS buffers: max displacements & immediates.  */
 562 static expressionS disp_expressions[MAX_MEMORY_OPERANDS];
 563 static expressionS im_expressions[MAX_IMMEDIATE_OPERANDS];
 564
 565 /* Current operand we are working on.  */
 566 static int this_operand = -1;
 567
 568 /* We support four different modes.  FLAG_CODE variable is used to distinguish
 569    these.  */
 570
 571 enum flag_code {
 572         CODE_32BIT,
 573         CODE_16BIT,
 574         CODE_64BIT };
 575
 576 static enum flag_code flag_code;
 577 static unsigned int object_64bit;
 578 static unsigned int disallow_64bit_reloc;
 579 static int use_rela_relocations = 0;
 580 /* __tls_get_addr/___tls_get_addr symbol for TLS.  */
 581 static const char *tls_get_addr;
 582
 583 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
 584      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
 585      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
 586
 587 /* The ELF ABI to use.  */
 588 enum x86_elf_abi
 589 {
 590   I386_ABI,
 591   X86_64_ABI,
 592   X86_64_X32_ABI
 593 };
 594
 595 static enum x86_elf_abi x86_elf_abi = I386_ABI;
 596 #endif
 597
 598 #if defined (TE_PE) || defined (TE_PEP)
 599 /* Use big object file format.  */
 600 static int use_big_obj = 0;
 601 #endif
 602
 603 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 604 /* 1 if generating code for a shared library.  */
 605 static int shared = 0;
 606
 607 unsigned int x86_sframe_cfa_sp_reg;
 608 /* The other CFA base register for SFrame unwind info.  */
 609 unsigned int x86_sframe_cfa_fp_reg;
 610 unsigned int x86_sframe_cfa_ra_reg;
 611
 612 #endif
 613
 614 /* 1 for intel syntax,
 615    0 if att syntax.  */
 616 static int intel_syntax = 0;
 617
 618 static enum x86_64_isa
 619 {
 620   amd64 = 1,    /* AMD64 ISA.  */
 621   intel64       /* Intel64 ISA.  */
 622 } isa64;
 623
 624 /* 1 for intel mnemonic,
 625    0 if att mnemonic.  */
 626 static int intel_mnemonic = !SYSV386_COMPAT;
 627
 628 /* 1 if pseudo registers are permitted.  */
 629 static int allow_pseudo_reg = 0;
 630
 631 /* 1 if register prefix % not required.  */
 632 static int allow_naked_reg = 0;
 633
 634 /* 1 if the assembler should add BND prefix for all control-transferring
 635    instructions supporting it, even if this prefix wasn't specified
 636    explicitly.  */
 637 static int add_bnd_prefix = 0;
 638
 639 /* 1 if pseudo index register, eiz/riz, is allowed .  */
 640 static int allow_index_reg = 0;
 641
 642 /* 1 if the assembler should ignore LOCK prefix, even if it was
 643    specified explicitly.  */
 644 static int omit_lock_prefix = 0;
 645
 646 /* 1 if the assembler should encode lfence, mfence, and sfence as
 647    "lock addl $0, (%{re}sp)".  */
 648 static int avoid_fence = 0;
 649
 650 /* 1 if lfence should be inserted after every load.  */
 651 static int lfence_after_load = 0;
 652
 653 /* Non-zero if lfence should be inserted before indirect branch.  */
 654 static enum lfence_before_indirect_branch_kind
 655   {
 656     lfence_branch_none = 0,
 657     lfence_branch_register,
 658     lfence_branch_memory,
 659     lfence_branch_all
 660   }
 661 lfence_before_indirect_branch;
 662
 663 /* Non-zero if lfence should be inserted before ret.  */
 664 static enum lfence_before_ret_kind
 665   {
 666     lfence_before_ret_none = 0,
 667     lfence_before_ret_not,
 668     lfence_before_ret_or,
 669     lfence_before_ret_shl
 670   }
 671 lfence_before_ret;
 672
 673 /* Types of previous instruction is .byte or prefix.  */
 674 static struct
 675   {
 676     segT seg;
 677     const char *file;
 678     const char *name;
 679     unsigned int line;
 680     enum last_insn_kind
 681       {
 682         last_insn_other = 0,
 683         last_insn_directive,
 684         last_insn_prefix
 685       } kind;
 686   } last_insn;
 687
 688 /* 1 if the assembler should generate relax relocations.  */
 689
 690 static int generate_relax_relocations
 691   = DEFAULT_GENERATE_X86_RELAX_RELOCATIONS;
 692
 693 static enum check_kind
 694   {
 695     check_none = 0,
 696     check_warning,
 697     check_error
 698   }
 699 sse_check, operand_check = check_warning;
 700
 701 /* Non-zero if branches should be aligned within power of 2 boundary.  */
 702 static int align_branch_power = 0;
 703
 704 /* Types of branches to align.  */
 705 enum align_branch_kind
 706   {
 707     align_branch_none = 0,
 708     align_branch_jcc = 1,
 709     align_branch_fused = 2,
 710     align_branch_jmp = 3,
 711     align_branch_call = 4,
 712     align_branch_indirect = 5,
 713     align_branch_ret = 6
 714   };
 715
 716 /* Type bits of branches to align.  */
 717 enum align_branch_bit
 718   {
 719     align_branch_jcc_bit = 1 << align_branch_jcc,
 720     align_branch_fused_bit = 1 << align_branch_fused,
 721     align_branch_jmp_bit = 1 << align_branch_jmp,
 722     align_branch_call_bit = 1 << align_branch_call,
 723     align_branch_indirect_bit = 1 << align_branch_indirect,
 724     align_branch_ret_bit = 1 << align_branch_ret
 725   };
 726
 727 static unsigned int align_branch = (align_branch_jcc_bit
 728                                     | align_branch_fused_bit
 729                                     | align_branch_jmp_bit);
 730
 731 /* Types of condition jump used by macro-fusion.  */
 732 enum mf_jcc_kind
 733   {
 734     mf_jcc_jo = 0,  /* base opcode 0x70  */
 735     mf_jcc_jc,      /* base opcode 0x72  */
 736     mf_jcc_je,      /* base opcode 0x74  */
 737     mf_jcc_jna,     /* base opcode 0x76  */
 738     mf_jcc_js,      /* base opcode 0x78  */
 739     mf_jcc_jp,      /* base opcode 0x7a  */
 740     mf_jcc_jl,      /* base opcode 0x7c  */
 741     mf_jcc_jle,     /* base opcode 0x7e  */
 742   };
 743
 744 /* Types of compare flag-modifying insntructions used by macro-fusion.  */
 745 enum mf_cmp_kind
 746   {
 747     mf_cmp_test_and,  /* test/cmp */
 748     mf_cmp_alu_cmp,  /* add/sub/cmp */
 749     mf_cmp_incdec  /* inc/dec */
 750   };
 751
 752 /* The maximum padding size for fused jcc.  CMP like instruction can
 753    be 9 bytes and jcc can be 6 bytes.  Leave room just in case for
 754    prefixes.   */
 755 #define MAX_FUSED_JCC_PADDING_SIZE 20
 756
 757 /* The maximum number of prefixes added for an instruction.  */
 758 static unsigned int align_branch_prefix_size = 5;
 759
 760 /* Optimization:
 761    1. Clear the REX_W bit with register operand if possible.
 762    2. Above plus use 128bit vector instruction to clear the full vector
 763       register.
 764  */
 765 static int optimize = 0;
 766
 767 /* Optimization:
 768    1. Clear the REX_W bit with register operand if possible.
 769    2. Above plus use 128bit vector instruction to clear the full vector
 770       register.
 771    3. Above plus optimize "test{q,l,w} $imm8,%r{64,32,16}" to
 772       "testb $imm7,%r8".
 773  */
 774 static int optimize_for_space = 0;
 775
 776 /* Register prefix used for error message.  */
 777 static const char *register_prefix = "%";
 778
 779 /* Used in 16 bit gcc mode to add an l suffix to call, ret, enter,
 780    leave, push, and pop instructions so that gcc has the same stack
 781    frame as in 32 bit mode.  */
 782 static char stackop_size = '\0';
 783
 784 /* Non-zero to optimize code alignment.  */
 785 int optimize_align_code = 1;
 786
 787 /* Non-zero to quieten some warnings.  */
 788 static int quiet_warnings = 0;
 789
 790 /* Guard to avoid repeated warnings about non-16-bit code on 16-bit CPUs.  */
 791 static bool pre_386_16bit_warned;
 792
 793 /* CPU name.  */
 794 static const char *cpu_arch_name = NULL;
 795 static char *cpu_sub_arch_name = NULL;
 796
 797 /* CPU feature flags.  */
 798 static i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
 799
 800 /* If we have selected a cpu we are generating instructions for.  */
 801 static int cpu_arch_tune_set = 0;
 802
 803 /* Cpu we are generating instructions for.  */
 804 enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
 805
 806 /* CPU feature flags of cpu we are generating instructions for.  */
 807 static i386_cpu_flags cpu_arch_tune_flags;
 808
 809 /* CPU instruction set architecture used.  */
 810 enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
 811
 812 /* CPU feature flags of instruction set architecture used.  */
 813 i386_cpu_flags cpu_arch_isa_flags;
 814
 815 /* If set, conditional jumps are not automatically promoted to handle
 816    larger than a byte offset.  */
 817 static bool no_cond_jump_promotion = false;
 818
 819 /* Encode SSE instructions with VEX prefix.  */
 820 static unsigned int sse2avx;
 821
 822 /* Encode aligned vector move as unaligned vector move.  */
 823 static unsigned int use_unaligned_vector_move;
 824
 825 /* Encode scalar AVX instructions with specific vector length.  */
 826 static enum
 827   {
 828     vex128 = 0,
 829     vex256
 830   } avxscalar;
 831
 832 /* Encode VEX WIG instructions with specific vex.w.  */
 833 static enum
 834   {
 835     vexw0 = 0,
 836     vexw1
 837   } vexwig;
 838
 839 /* Encode scalar EVEX LIG instructions with specific vector length.  */
 840 static enum
 841   {
 842     evexl128 = 0,
 843     evexl256,
 844     evexl512
 845   } evexlig;
 846
 847 /* Encode EVEX WIG instructions with specific evex.w.  */
 848 static enum
 849   {
 850     evexw0 = 0,
 851     evexw1
 852   } evexwig;
 853
 854 /* Value to encode in EVEX RC bits, for SAE-only instructions.  */
 855 static enum rc_type evexrcig = rne;
 856
 857 /* Pre-defined "_GLOBAL_OFFSET_TABLE_".  */
 858 static symbolS *GOT_symbol;
 859
 860 /* The dwarf2 return column, adjusted for 32 or 64 bit.  */
 861 unsigned int x86_dwarf2_return_column;
 862
 863 /* The dwarf2 data alignment, adjusted for 32 or 64 bit.  */
 864 int x86_cie_data_alignment;
 865
 866 /* Interface to relax_segment.
 867    There are 3 major relax states for 386 jump insns because the
 868    different types of jumps add different sizes to frags when we're
 869    figuring out what sort of jump to choose to reach a given label.
 870
 871    BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING are used to align
 872    branches which are handled by md_estimate_size_before_relax() and
 873    i386_generic_table_relax_frag().  */
 874
 875 /* Types.  */
 876 #define UNCOND_JUMP 0
 877 #define COND_JUMP 1
 878 #define COND_JUMP86 2
 879 #define BRANCH_PADDING 3
 880 #define BRANCH_PREFIX 4
 881 #define FUSED_JCC_PADDING 5
 882
 883 /* Sizes.  */
 884 #define CODE16  1
 885 #define SMALL   0
 886 #define SMALL16 (SMALL | CODE16)
 887 #define BIG     2
 888 #define BIG16   (BIG | CODE16)
 889
 890 #ifndef INLINE
 891 #ifdef __GNUC__
 892 #define INLINE __inline__
 893 #else
 894 #define INLINE
 895 #endif
 896 #endif
 897
 898 #define ENCODE_RELAX_STATE(type, size) \
 899   ((relax_substateT) (((type) << 2) | (size)))
 900 #define TYPE_FROM_RELAX_STATE(s) \
 901   ((s) >> 2)
 902 #define DISP_SIZE_FROM_RELAX_STATE(s) \
 903     ((((s) & 3) == BIG ? 4 : (((s) & 3) == BIG16 ? 2 : 1)))
 904
 905 /* This table is used by relax_frag to promote short jumps to long
 906    ones where necessary.  SMALL (short) jumps may be promoted to BIG
 907    (32 bit long) ones, and SMALL16 jumps to BIG16 (16 bit long).  We
 908    don't allow a short jump in a 32 bit code segment to be promoted to
 909    a 16 bit offset jump because it's slower (requires data size
 910    prefix), and doesn't work, unless the destination is in the bottom
 911    64k of the code segment (The top 16 bits of eip are zeroed).  */
 912
 913 const relax_typeS md_relax_table[] =
 914 {
 915   /* The fields are:
 916      1) most positive reach of this state,
 917      2) most negative reach of this state,
 918      3) how many bytes this mode will have in the variable part of the frag
 919      4) which index into the table to try if we can't fit into this one.  */
 920
 921   /* UNCOND_JUMP states.  */
 922   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)},
 923   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)},
 924   /* dword jmp adds 4 bytes to frag:
 925      0 extra opcode bytes, 4 displacement bytes.  */
 926   {0, 0, 4, 0},
 927   /* word jmp adds 2 byte2 to frag:
 928      0 extra opcode bytes, 2 displacement bytes.  */
 929   {0, 0, 2, 0},
 930
 931   /* COND_JUMP states.  */
 932   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG)},
 933   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG16)},
 934   /* dword conditionals adds 5 bytes to frag:
 935      1 extra opcode byte, 4 displacement bytes.  */
 936   {0, 0, 5, 0},
 937   /* word conditionals add 3 bytes to frag:
 938      1 extra opcode byte, 2 displacement bytes.  */
 939   {0, 0, 3, 0},
 940
 941   /* COND_JUMP86 states.  */
 942   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG)},
 943   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG16)},
 944   /* dword conditionals adds 5 bytes to frag:
 945      1 extra opcode byte, 4 displacement bytes.  */
 946   {0, 0, 5, 0},
 947   /* word conditionals add 4 bytes to frag:
 948      1 displacement byte and a 3 byte long branch insn.  */
 949   {0, 0, 4, 0}
 950 };
 951
 952 #define ARCH(n, t, f, s) \
 953   { STRING_COMMA_LEN (#n), s, PROCESSOR_ ## t, CPU_ ## f ## _FLAGS, \
 954     CPU_NONE_FLAGS }
 955 #define SUBARCH(n, e, d, s) \
 956   { STRING_COMMA_LEN (#n), s, PROCESSOR_NONE, CPU_ ## e ## _FLAGS, \
 957     CPU_ ## d ## _FLAGS }
 958
 959 static const arch_entry cpu_arch[] =
 960 {
 961   /* Do not replace the first two entries - i386_target_format() and
 962      set_cpu_arch() rely on them being there in this order.  */
 963   ARCH (generic32, GENERIC32, GENERIC32, false),
 964   ARCH (generic64, GENERIC64, GENERIC64, false),
 965   ARCH (i8086, UNKNOWN, NONE, false),
 966   ARCH (i186, UNKNOWN, I186, false),
 967   ARCH (i286, UNKNOWN, I286, false),
 968   ARCH (i386, I386, I386, false),
 969   ARCH (i486, I486, I486, false),
 970   ARCH (i586, PENTIUM, I586, false),
 971   ARCH (i686, PENTIUMPRO, I686, false),
 972   ARCH (pentium, PENTIUM, I586, false),
 973   ARCH (pentiumpro, PENTIUMPRO, PENTIUMPRO, false),
 974   ARCH (pentiumii, PENTIUMPRO, P2, false),
 975   ARCH (pentiumiii, PENTIUMPRO, P3, false),
 976   ARCH (pentium4, PENTIUM4, P4, false),
 977   ARCH (prescott, NOCONA, CORE, false),
 978   ARCH (nocona, NOCONA, NOCONA, false),
 979   ARCH (yonah, CORE, CORE, true),
 980   ARCH (core, CORE, CORE, false),
 981   ARCH (merom, CORE2, CORE2, true),
 982   ARCH (core2, CORE2, CORE2, false),
 983   ARCH (corei7, COREI7, COREI7, false),
 984   ARCH (iamcu, IAMCU, IAMCU, false),
 985   ARCH (k6, K6, K6, false),
 986   ARCH (k6_2, K6, K6_2, false),
 987   ARCH (athlon, ATHLON, ATHLON, false),
 988   ARCH (sledgehammer, K8, K8, true),
 989   ARCH (opteron, K8, K8, false),
 990   ARCH (k8, K8, K8, false),
 991   ARCH (amdfam10, AMDFAM10, AMDFAM10, false),
 992   ARCH (bdver1, BD, BDVER1, false),
 993   ARCH (bdver2, BD, BDVER2, false),
 994   ARCH (bdver3, BD, BDVER3, false),
 995   ARCH (bdver4, BD, BDVER4, false),
 996   ARCH (znver1, ZNVER, ZNVER1, false),
 997   ARCH (znver2, ZNVER, ZNVER2, false),
 998   ARCH (znver3, ZNVER, ZNVER3, false),
 999   ARCH (znver4, ZNVER, ZNVER4, false),
1000   ARCH (btver1, BT, BTVER1, false),
1001   ARCH (btver2, BT, BTVER2, false),
1002
1003   SUBARCH (8087, 8087, ANY_X87, false),
1004   SUBARCH (87, NONE, ANY_X87, false), /* Disable only!  */
1005   SUBARCH (287, 287, ANY_287, false),
1006   SUBARCH (387, 387, ANY_387, false),
1007   SUBARCH (687, 687, ANY_687, false),
1008   SUBARCH (cmov, CMOV, ANY_CMOV, false),
1009   SUBARCH (fxsr, FXSR, ANY_FXSR, false),
1010   SUBARCH (mmx, MMX, ANY_MMX, false),
1011   SUBARCH (sse, SSE, ANY_SSE, false),
1012   SUBARCH (sse2, SSE2, ANY_SSE2, false),
1013   SUBARCH (sse3, SSE3, ANY_SSE3, false),
1014   SUBARCH (sse4a, SSE4A, ANY_SSE4A, false),
1015   SUBARCH (ssse3, SSSE3, ANY_SSSE3, false),
1016   SUBARCH (sse4.1, SSE4_1, ANY_SSE4_1, false),
1017   SUBARCH (sse4.2, SSE4_2, ANY_SSE4_2, false),
1018   SUBARCH (sse4, SSE4_2, ANY_SSE4_1, false),
1019   SUBARCH (avx, AVX, ANY_AVX, false),
1020   SUBARCH (avx2, AVX2, ANY_AVX2, false),
1021   SUBARCH (avx512f, AVX512F, ANY_AVX512F, false),
1022   SUBARCH (avx512cd, AVX512CD, ANY_AVX512CD, false),
1023   SUBARCH (avx512er, AVX512ER, ANY_AVX512ER, false),
1024   SUBARCH (avx512pf, AVX512PF, ANY_AVX512PF, false),
1025   SUBARCH (avx512dq, AVX512DQ, ANY_AVX512DQ, false),
1026   SUBARCH (avx512bw, AVX512BW, ANY_AVX512BW, false),
1027   SUBARCH (avx512vl, AVX512VL, ANY_AVX512VL, false),
1028   SUBARCH (vmx, VMX, VMX, false),
1029   SUBARCH (vmfunc, VMFUNC, VMFUNC, false),
1030   SUBARCH (smx, SMX, SMX, false),
1031   SUBARCH (xsave, XSAVE, XSAVE, false),
1032   SUBARCH (xsaveopt, XSAVEOPT, XSAVEOPT, false),
1033   SUBARCH (xsavec, XSAVEC, XSAVEC, false),
1034   SUBARCH (xsaves, XSAVES, XSAVES, false),
1035   SUBARCH (aes, AES, AES, false),
1036   SUBARCH (pclmul, PCLMUL, PCLMUL, false),
1037   SUBARCH (clmul, PCLMUL, PCLMUL, true),
1038   SUBARCH (fsgsbase, FSGSBASE, FSGSBASE, false),
1039   SUBARCH (rdrnd, RDRND, RDRND, false),
1040   SUBARCH (f16c, F16C, F16C, false),
1041   SUBARCH (bmi2, BMI2, BMI2, false),
1042   SUBARCH (fma, FMA, FMA, false),
1043   SUBARCH (fma4, FMA4, FMA4, false),
1044   SUBARCH (xop, XOP, XOP, false),
1045   SUBARCH (lwp, LWP, LWP, false),
1046   SUBARCH (movbe, MOVBE, MOVBE, false),
1047   SUBARCH (cx16, CX16, CX16, false),
1048   SUBARCH (ept, EPT, EPT, false),
1049   SUBARCH (lzcnt, LZCNT, LZCNT, false),
1050   SUBARCH (popcnt, POPCNT, POPCNT, false),
1051   SUBARCH (hle, HLE, HLE, false),
1052   SUBARCH (rtm, RTM, RTM, false),
1053   SUBARCH (invpcid, INVPCID, INVPCID, false),
1054   SUBARCH (clflush, CLFLUSH, CLFLUSH, false),
1055   SUBARCH (nop, NOP, NOP, false),
1056   SUBARCH (syscall, SYSCALL, SYSCALL, false),
1057   SUBARCH (rdtscp, RDTSCP, RDTSCP, false),
1058   SUBARCH (3dnow, 3DNOW, 3DNOW, false),
1059   SUBARCH (3dnowa, 3DNOWA, 3DNOWA, false),
1060   SUBARCH (padlock, PADLOCK, PADLOCK, false),
1061   SUBARCH (pacifica, SVME, SVME, true),
1062   SUBARCH (svme, SVME, SVME, false),
1063   SUBARCH (abm, ABM, ABM, false),
1064   SUBARCH (bmi, BMI, BMI, false),
1065   SUBARCH (tbm, TBM, TBM, false),
1066   SUBARCH (adx, ADX, ADX, false),
1067   SUBARCH (rdseed, RDSEED, RDSEED, false),
1068   SUBARCH (prfchw, PRFCHW, PRFCHW, false),
1069   SUBARCH (smap, SMAP, SMAP, false),
1070   SUBARCH (mpx, MPX, MPX, false),
1071   SUBARCH (sha, SHA, SHA, false),
1072   SUBARCH (clflushopt, CLFLUSHOPT, CLFLUSHOPT, false),
1073   SUBARCH (prefetchwt1, PREFETCHWT1, PREFETCHWT1, false),
1074   SUBARCH (se1, SE1, SE1, false),
1075   SUBARCH (clwb, CLWB, CLWB, false),
1076   SUBARCH (avx512ifma, AVX512IFMA, ANY_AVX512IFMA, false),
1077   SUBARCH (avx512vbmi, AVX512VBMI, ANY_AVX512VBMI, false),
1078   SUBARCH (avx512_4fmaps, AVX512_4FMAPS, ANY_AVX512_4FMAPS, false),
1079   SUBARCH (avx512_4vnniw, AVX512_4VNNIW, ANY_AVX512_4VNNIW, false),
1080   SUBARCH (avx512_vpopcntdq, AVX512_VPOPCNTDQ, ANY_AVX512_VPOPCNTDQ, false),
1081   SUBARCH (avx512_vbmi2, AVX512_VBMI2, ANY_AVX512_VBMI2, false),
1082   SUBARCH (avx512_vnni, AVX512_VNNI, ANY_AVX512_VNNI, false),
1083   SUBARCH (avx512_bitalg, AVX512_BITALG, ANY_AVX512_BITALG, false),
1084   SUBARCH (avx_vnni, AVX_VNNI, ANY_AVX_VNNI, false),
1085   SUBARCH (clzero, CLZERO, CLZERO, false),
1086   SUBARCH (mwaitx, MWAITX, MWAITX, false),
1087   SUBARCH (ospke, OSPKE, OSPKE, false),
1088   SUBARCH (rdpid, RDPID, RDPID, false),
1089   SUBARCH (ptwrite, PTWRITE, PTWRITE, false),
1090   SUBARCH (ibt, IBT, ANY_IBT, false),
1091   SUBARCH (shstk, SHSTK, ANY_SHSTK, false),
1092   SUBARCH (gfni, GFNI, GFNI, false),
1093   SUBARCH (vaes, VAES, VAES, false),
1094   SUBARCH (vpclmulqdq, VPCLMULQDQ, VPCLMULQDQ, false),
1095   SUBARCH (wbnoinvd, WBNOINVD, WBNOINVD, false),
1096   SUBARCH (pconfig, PCONFIG, PCONFIG, false),
1097   SUBARCH (waitpkg, WAITPKG, WAITPKG, false),
1098   SUBARCH (cldemote, CLDEMOTE, CLDEMOTE, false),
1099   SUBARCH (amx_int8, AMX_INT8, ANY_AMX_INT8, false),
1100   SUBARCH (amx_bf16, AMX_BF16, ANY_AMX_BF16, false),
1101   SUBARCH (amx_fp16, AMX_FP16, AMX_FP16, false),
1102   SUBARCH (amx_tile, AMX_TILE, ANY_AMX_TILE, false),
1103   SUBARCH (movdiri, MOVDIRI, ANY_MOVDIRI, false),
1104   SUBARCH (movdir64b, MOVDIR64B, ANY_MOVDIR64B, false),
1105   SUBARCH (avx512_bf16, AVX512_BF16, ANY_AVX512_BF16, false),
1106   SUBARCH (avx512_vp2intersect, AVX512_VP2INTERSECT,
1107            ANY_AVX512_VP2INTERSECT, false),
1108   SUBARCH (tdx, TDX, ANY_TDX, false),
1109   SUBARCH (enqcmd, ENQCMD, ANY_ENQCMD, false),
1110   SUBARCH (serialize, SERIALIZE, ANY_SERIALIZE, false),
1111   SUBARCH (rdpru, RDPRU, RDPRU, false),
1112   SUBARCH (mcommit, MCOMMIT, MCOMMIT, false),
1113   SUBARCH (sev_es, SEV_ES, SEV_ES, false),
1114   SUBARCH (tsxldtrk, TSXLDTRK, ANY_TSXLDTRK, false),
1115   SUBARCH (kl, KL, ANY_KL, false),
1116   SUBARCH (widekl, WIDEKL, ANY_WIDEKL, false),
1117   SUBARCH (uintr, UINTR, ANY_UINTR, false),
1118   SUBARCH (hreset, HRESET, ANY_HRESET, false),
1119   SUBARCH (avx512_fp16, AVX512_FP16, ANY_AVX512_FP16, false),
1120   SUBARCH (prefetchi, PREFETCHI, PREFETCHI, false),
1121   SUBARCH (avx_ifma, AVX_IFMA, ANY_AVX_IFMA, false),
1122   SUBARCH (avx_vnni_int8, AVX_VNNI_INT8, ANY_AVX_VNNI_INT8, false),
1123   SUBARCH (cmpccxadd, CMPCCXADD, ANY_CMPCCXADD, false),
1124   SUBARCH (wrmsrns, WRMSRNS, ANY_WRMSRNS, false),
1125   SUBARCH (msrlist, MSRLIST, ANY_MSRLIST, false),
1126   SUBARCH (avx_ne_convert, AVX_NE_CONVERT, ANY_AVX_NE_CONVERT, false),
1127   SUBARCH (rao_int, RAO_INT, ANY_RAO_INT, false),
1128   SUBARCH (rmpquery, RMPQUERY, RMPQUERY, false),
1129 };
1130
1131 #undef SUBARCH
1132 #undef ARCH
1133
1134 #ifdef I386COFF
1135 /* Like s_lcomm_internal in gas/read.c but the alignment string
1136    is allowed to be optional.  */
1137
1138 static symbolS *
1139 pe_lcomm_internal (int needs_align, symbolS *symbolP, addressT size)
1140 {
1141   addressT align = 0;
1142
1143   SKIP_WHITESPACE ();
1144
1145   if (needs_align
1146       && *input_line_pointer == ',')
1147     {
1148       align = parse_align (needs_align - 1);
1149
1150       if (align == (addressT) -1)
1151         return NULL;
1152     }
1153   else
1154     {
1155       if (size >= 8)
1156         align = 3;
1157       else if (size >= 4)
1158         align = 2;
1159       else if (size >= 2)
1160         align = 1;
1161       else
1162         align = 0;
1163     }
1164
1165   bss_alloc (symbolP, size, align);
1166   return symbolP;
1167 }
1168
1169 static void
1170 pe_lcomm (int needs_align)
1171 {
1172   s_comm_internal (needs_align * 2, pe_lcomm_internal);
1173 }
1174 #endif
1175
1176 const pseudo_typeS md_pseudo_table[] =
1177 {
1178 #if !defined(OBJ_AOUT) && !defined(USE_ALIGN_PTWO)
1179   {"align", s_align_bytes, 0},
1180 #else
1181   {"align", s_align_ptwo, 0},
1182 #endif
1183   {"arch", set_cpu_arch, 0},
1184 #ifndef I386COFF
1185   {"bss", s_bss, 0},
1186 #else
1187   {"lcomm", pe_lcomm, 1},
1188 #endif
1189   {"ffloat", float_cons, 'f'},
1190   {"dfloat", float_cons, 'd'},
1191   {"tfloat", float_cons, 'x'},
1192   {"hfloat", float_cons, 'h'},
1193   {"bfloat16", float_cons, 'b'},
1194   {"value", cons, 2},
1195   {"slong", signed_cons, 4},
1196   {"noopt", s_ignore, 0},
1197   {"optim", s_ignore, 0},
1198   {"code16gcc", set_16bit_gcc_code_flag, CODE_16BIT},
1199   {"code16", set_code_flag, CODE_16BIT},
1200   {"code32", set_code_flag, CODE_32BIT},
1201 #ifdef BFD64
1202   {"code64", set_code_flag, CODE_64BIT},
1203 #endif
1204   {"intel_syntax", set_intel_syntax, 1},
1205   {"att_syntax", set_intel_syntax, 0},
1206   {"intel_mnemonic", set_intel_mnemonic, 1},
1207   {"att_mnemonic", set_intel_mnemonic, 0},
1208   {"allow_index_reg", set_allow_index_reg, 1},
1209   {"disallow_index_reg", set_allow_index_reg, 0},
1210   {"sse_check", set_check, 0},
1211   {"operand_check", set_check, 1},
1212 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
1213   {"largecomm", handle_large_common, 0},
1214 #else
1215   {"file", dwarf2_directive_file, 0},
1216   {"loc", dwarf2_directive_loc, 0},
1217   {"loc_mark_labels", dwarf2_directive_loc_mark_labels, 0},
1218 #endif
1219 #ifdef TE_PE
1220   {"secrel32", pe_directive_secrel, 0},
1221   {"secidx", pe_directive_secidx, 0},
1222 #endif
1223   {0, 0, 0}
1224 };
1225
1226 /* For interface with expression ().  */
1227 extern char *input_line_pointer;
1228
1229 /* Hash table for instruction mnemonic lookup.  */
1230 static htab_t op_hash;
1231
1232 /* Hash table for register lookup.  */
1233 static htab_t reg_hash;
1234 \f
1235   /* Various efficient no-op patterns for aligning code labels.
1236      Note: Don't try to assemble the instructions in the comments.
1237      0L and 0w are not legal.  */
1238 static const unsigned char f32_1[] =
1239   {0x90};                               /* nop                  */
1240 static const unsigned char f32_2[] =
1241   {0x66,0x90};                          /* xchg %ax,%ax         */
1242 static const unsigned char f32_3[] =
1243   {0x8d,0x76,0x00};                     /* leal 0(%esi),%esi    */
1244 static const unsigned char f32_4[] =
1245   {0x8d,0x74,0x26,0x00};                /* leal 0(%esi,1),%esi  */
1246 static const unsigned char f32_6[] =
1247   {0x8d,0xb6,0x00,0x00,0x00,0x00};      /* leal 0L(%esi),%esi   */
1248 static const unsigned char f32_7[] =
1249   {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
1250 static const unsigned char f16_3[] =
1251   {0x8d,0x74,0x00};                     /* lea 0(%si),%si       */
1252 static const unsigned char f16_4[] =
1253   {0x8d,0xb4,0x00,0x00};                /* lea 0W(%si),%si      */
1254 static const unsigned char jump_disp8[] =
1255   {0xeb};                               /* jmp disp8           */
1256 static const unsigned char jump32_disp32[] =
1257   {0xe9};                               /* jmp disp32          */
1258 static const unsigned char jump16_disp32[] =
1259   {0x66,0xe9};                          /* jmp disp32          */
1260 /* 32-bit NOPs patterns.  */
1261 static const unsigned char *const f32_patt[] = {
1262   f32_1, f32_2, f32_3, f32_4, NULL, f32_6, f32_7
1263 };
1264 /* 16-bit NOPs patterns.  */
1265 static const unsigned char *const f16_patt[] = {
1266   f32_1, f32_2, f16_3, f16_4
1267 };
1268 /* nopl (%[re]ax) */
1269 static const unsigned char alt_3[] =
1270   {0x0f,0x1f,0x00};
1271 /* nopl 0(%[re]ax) */
1272 static const unsigned char alt_4[] =
1273   {0x0f,0x1f,0x40,0x00};
1274 /* nopl 0(%[re]ax,%[re]ax,1) */
1275 static const unsigned char alt_5[] =
1276   {0x0f,0x1f,0x44,0x00,0x00};
1277 /* nopw 0(%[re]ax,%[re]ax,1) */
1278 static const unsigned char alt_6[] =
1279   {0x66,0x0f,0x1f,0x44,0x00,0x00};
1280 /* nopl 0L(%[re]ax) */
1281 static const unsigned char alt_7[] =
1282   {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
1283 /* nopl 0L(%[re]ax,%[re]ax,1) */
1284 static const unsigned char alt_8[] =
1285   {0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1286 /* nopw 0L(%[re]ax,%[re]ax,1) */
1287 static const unsigned char alt_9[] =
1288   {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1289 /* nopw %cs:0L(%[re]ax,%[re]ax,1) */
1290 static const unsigned char alt_10[] =
1291   {0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1292 /* data16 nopw %cs:0L(%eax,%eax,1) */
1293 static const unsigned char alt_11[] =
1294   {0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1295 /* 32-bit and 64-bit NOPs patterns.  */
1296 static const unsigned char *const alt_patt[] = {
1297   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
1298   alt_9, alt_10, alt_11
1299 };
1300
1301 /* Genenerate COUNT bytes of NOPs to WHERE from PATT with the maximum
1302    size of a single NOP instruction MAX_SINGLE_NOP_SIZE.  */
1303
1304 static void
1305 i386_output_nops (char *where, const unsigned char *const *patt,
1306                   int count, int max_single_nop_size)
1307
1308 {
1309   /* Place the longer NOP first.  */
1310   int last;
1311   int offset;
1312   const unsigned char *nops;
1313
1314   if (max_single_nop_size < 1)
1315     {
1316       as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
1317                 max_single_nop_size);
1318       return;
1319     }
1320
1321   nops = patt[max_single_nop_size - 1];
1322
1323   /* Use the smaller one if the requsted one isn't available.  */
1324   if (nops == NULL)
1325     {
1326       max_single_nop_size--;
1327       nops = patt[max_single_nop_size - 1];
1328     }
1329
1330   last = count % max_single_nop_size;
1331
1332   count -= last;
1333   for (offset = 0; offset < count; offset += max_single_nop_size)
1334     memcpy (where + offset, nops, max_single_nop_size);
1335
1336   if (last)
1337     {
1338       nops = patt[last - 1];
1339       if (nops == NULL)
1340         {
1341           /* Use the smaller one plus one-byte NOP if the needed one
1342              isn't available.  */
1343           last--;
1344           nops = patt[last - 1];
1345           memcpy (where + offset, nops, last);
1346           where[offset + last] = *patt[0];
1347         }
1348       else
1349         memcpy (where + offset, nops, last);
1350     }
1351 }
1352
1353 static INLINE int
1354 fits_in_imm7 (offsetT num)
1355 {
1356   return (num & 0x7f) == num;
1357 }
1358
1359 static INLINE int
1360 fits_in_imm31 (offsetT num)
1361 {
1362   return (num & 0x7fffffff) == num;
1363 }
1364
1365 /* Genenerate COUNT bytes of NOPs to WHERE with the maximum size of a
1366    single NOP instruction LIMIT.  */
1367
1368 void
1369 i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
1370 {
1371   const unsigned char *const *patt = NULL;
1372   int max_single_nop_size;
1373   /* Maximum number of NOPs before switching to jump over NOPs.  */
1374   int max_number_of_nops;
1375
1376   switch (fragP->fr_type)
1377     {
1378     case rs_fill_nop:
1379     case rs_align_code:
1380       break;
1381     case rs_machine_dependent:
1382       /* Allow NOP padding for jumps and calls.  */
1383       if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
1384           || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
1385         break;
1386       /* Fall through.  */
1387     default:
1388       return;
1389     }
1390
1391   /* We need to decide which NOP sequence to use for 32bit and
1392      64bit. When -mtune= is used:
1393
1394      1. For PROCESSOR_I386, PROCESSOR_I486, PROCESSOR_PENTIUM and
1395      PROCESSOR_GENERIC32, f32_patt will be used.
1396      2. For the rest, alt_patt will be used.
1397
1398      When -mtune= isn't used, alt_patt will be used if
1399      cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt will
1400      be used.
1401
1402      When -march= or .arch is used, we can't use anything beyond
1403      cpu_arch_isa_flags.   */
1404
1405   if (flag_code == CODE_16BIT)
1406     {
1407       patt = f16_patt;
1408       max_single_nop_size = sizeof (f16_patt) / sizeof (f16_patt[0]);
1409       /* Limit number of NOPs to 2 in 16-bit mode.  */
1410       max_number_of_nops = 2;
1411     }
1412   else
1413     {
1414       if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN)
1415         {
1416           /* PROCESSOR_UNKNOWN means that all ISAs may be used.  */
1417           switch (cpu_arch_tune)
1418             {
1419             case PROCESSOR_UNKNOWN:
1420               /* We use cpu_arch_isa_flags to check if we SHOULD
1421                  optimize with nops.  */
1422               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1423                 patt = alt_patt;
1424               else
1425                 patt = f32_patt;
1426               break;
1427             case PROCESSOR_PENTIUM4:
1428             case PROCESSOR_NOCONA:
1429             case PROCESSOR_CORE:
1430             case PROCESSOR_CORE2:
1431             case PROCESSOR_COREI7:
1432             case PROCESSOR_GENERIC64:
1433             case PROCESSOR_K6:
1434             case PROCESSOR_ATHLON:
1435             case PROCESSOR_K8:
1436             case PROCESSOR_AMDFAM10:
1437             case PROCESSOR_BD:
1438             case PROCESSOR_ZNVER:
1439             case PROCESSOR_BT:
1440               patt = alt_patt;
1441               break;
1442             case PROCESSOR_I386:
1443             case PROCESSOR_I486:
1444             case PROCESSOR_PENTIUM:
1445             case PROCESSOR_PENTIUMPRO:
1446             case PROCESSOR_IAMCU:
1447             case PROCESSOR_GENERIC32:
1448               patt = f32_patt;
1449               break;
1450             case PROCESSOR_NONE:
1451               abort ();
1452             }
1453         }
1454       else
1455         {
1456           switch (fragP->tc_frag_data.tune)
1457             {
1458             case PROCESSOR_UNKNOWN:
1459               /* When cpu_arch_isa is set, cpu_arch_tune shouldn't be
1460                  PROCESSOR_UNKNOWN.  */
1461               abort ();
1462               break;
1463
1464             case PROCESSOR_I386:
1465             case PROCESSOR_I486:
1466             case PROCESSOR_PENTIUM:
1467             case PROCESSOR_IAMCU:
1468             case PROCESSOR_K6:
1469             case PROCESSOR_ATHLON:
1470             case PROCESSOR_K8:
1471             case PROCESSOR_AMDFAM10:
1472             case PROCESSOR_BD:
1473             case PROCESSOR_ZNVER:
1474             case PROCESSOR_BT:
1475             case PROCESSOR_GENERIC32:
1476               /* We use cpu_arch_isa_flags to check if we CAN optimize
1477                  with nops.  */
1478               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1479                 patt = alt_patt;
1480               else
1481                 patt = f32_patt;
1482               break;
1483             case PROCESSOR_PENTIUMPRO:
1484             case PROCESSOR_PENTIUM4:
1485             case PROCESSOR_NOCONA:
1486             case PROCESSOR_CORE:
1487             case PROCESSOR_CORE2:
1488             case PROCESSOR_COREI7:
1489               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1490                 patt = alt_patt;
1491               else
1492                 patt = f32_patt;
1493               break;
1494             case PROCESSOR_GENERIC64:
1495               patt = alt_patt;
1496               break;
1497             case PROCESSOR_NONE:
1498               abort ();
1499             }
1500         }
1501
1502       if (patt == f32_patt)
1503         {
1504           max_single_nop_size = sizeof (f32_patt) / sizeof (f32_patt[0]);
1505           /* Limit number of NOPs to 2 for older processors.  */
1506           max_number_of_nops = 2;
1507         }
1508       else
1509         {
1510           max_single_nop_size = sizeof (alt_patt) / sizeof (alt_patt[0]);
1511           /* Limit number of NOPs to 7 for newer processors.  */
1512           max_number_of_nops = 7;
1513         }
1514     }
1515
1516   if (limit == 0)
1517     limit = max_single_nop_size;
1518
1519   if (fragP->fr_type == rs_fill_nop)
1520     {
1521       /* Output NOPs for .nop directive.  */
1522       if (limit > max_single_nop_size)
1523         {
1524           as_bad_where (fragP->fr_file, fragP->fr_line,
1525                         _("invalid single nop size: %d "
1526                           "(expect within [0, %d])"),
1527                         limit, max_single_nop_size);
1528           return;
1529         }
1530     }
1531   else if (fragP->fr_type != rs_machine_dependent)
1532     fragP->fr_var = count;
1533
1534   if ((count / max_single_nop_size) > max_number_of_nops)
1535     {
1536       /* Generate jump over NOPs.  */
1537       offsetT disp = count - 2;
1538       if (fits_in_imm7 (disp))
1539         {
1540           /* Use "jmp disp8" if possible.  */
1541           count = disp;
1542           where[0] = jump_disp8[0];
1543           where[1] = count;
1544           where += 2;
1545         }
1546       else
1547         {
1548           unsigned int size_of_jump;
1549
1550           if (flag_code == CODE_16BIT)
1551             {
1552               where[0] = jump16_disp32[0];
1553               where[1] = jump16_disp32[1];
1554               size_of_jump = 2;
1555             }
1556           else
1557             {
1558               where[0] = jump32_disp32[0];
1559               size_of_jump = 1;
1560             }
1561
1562           count -= size_of_jump + 4;
1563           if (!fits_in_imm31 (count))
1564             {
1565               as_bad_where (fragP->fr_file, fragP->fr_line,
1566                             _("jump over nop padding out of range"));
1567               return;
1568             }
1569
1570           md_number_to_chars (where + size_of_jump, count, 4);
1571           where += size_of_jump + 4;
1572         }
1573     }
1574
1575   /* Generate multiple NOPs.  */
1576   i386_output_nops (where, patt, count, limit);
1577 }
1578
1579 static INLINE int
1580 operand_type_all_zero (const union i386_operand_type *x)
1581 {
1582   switch (ARRAY_SIZE(x->array))
1583     {
1584     case 3:
1585       if (x->array[2])
1586         return 0;
1587       /* Fall through.  */
1588     case 2:
1589       if (x->array[1])
1590         return 0;
1591       /* Fall through.  */
1592     case 1:
1593       return !x->array[0];
1594     default:
1595       abort ();
1596     }
1597 }
1598
1599 static INLINE void
1600 operand_type_set (union i386_operand_type *x, unsigned int v)
1601 {
1602   switch (ARRAY_SIZE(x->array))
1603     {
1604     case 3:
1605       x->array[2] = v;
1606       /* Fall through.  */
1607     case 2:
1608       x->array[1] = v;
1609       /* Fall through.  */
1610     case 1:
1611       x->array[0] = v;
1612       /* Fall through.  */
1613       break;
1614     default:
1615       abort ();
1616     }
1617
1618   x->bitfield.class = ClassNone;
1619   x->bitfield.instance = InstanceNone;
1620 }
1621
1622 static INLINE int
1623 operand_type_equal (const union i386_operand_type *x,
1624                     const union i386_operand_type *y)
1625 {
1626   switch (ARRAY_SIZE(x->array))
1627     {
1628     case 3:
1629       if (x->array[2] != y->array[2])
1630         return 0;
1631       /* Fall through.  */
1632     case 2:
1633       if (x->array[1] != y->array[1])
1634         return 0;
1635       /* Fall through.  */
1636     case 1:
1637       return x->array[0] == y->array[0];
1638       break;
1639     default:
1640       abort ();
1641     }
1642 }
1643
1644 static INLINE int
1645 cpu_flags_all_zero (const union i386_cpu_flags *x)
1646 {
1647   switch (ARRAY_SIZE(x->array))
1648     {
1649     case 5:
1650       if (x->array[4])
1651         return 0;
1652       /* Fall through.  */
1653     case 4:
1654       if (x->array[3])
1655         return 0;
1656       /* Fall through.  */
1657     case 3:
1658       if (x->array[2])
1659         return 0;
1660       /* Fall through.  */
1661     case 2:
1662       if (x->array[1])
1663         return 0;
1664       /* Fall through.  */
1665     case 1:
1666       return !x->array[0];
1667     default:
1668       abort ();
1669     }
1670 }
1671
1672 static INLINE int
1673 cpu_flags_equal (const union i386_cpu_flags *x,
1674                  const union i386_cpu_flags *y)
1675 {
1676   switch (ARRAY_SIZE(x->array))
1677     {
1678     case 5:
1679       if (x->array[4] != y->array[4])
1680         return 0;
1681       /* Fall through.  */
1682     case 4:
1683       if (x->array[3] != y->array[3])
1684         return 0;
1685       /* Fall through.  */
1686     case 3:
1687       if (x->array[2] != y->array[2])
1688         return 0;
1689       /* Fall through.  */
1690     case 2:
1691       if (x->array[1] != y->array[1])
1692         return 0;
1693       /* Fall through.  */
1694     case 1:
1695       return x->array[0] == y->array[0];
1696       break;
1697     default:
1698       abort ();
1699     }
1700 }
1701
1702 static INLINE int
1703 cpu_flags_check_cpu64 (i386_cpu_flags f)
1704 {
1705   return !((flag_code == CODE_64BIT && f.bitfield.cpuno64)
1706            || (flag_code != CODE_64BIT && f.bitfield.cpu64));
1707 }
1708
1709 static INLINE i386_cpu_flags
1710 cpu_flags_and (i386_cpu_flags x, i386_cpu_flags y)
1711 {
1712   switch (ARRAY_SIZE (x.array))
1713     {
1714     case 5:
1715       x.array [4] &= y.array [4];
1716       /* Fall through.  */
1717     case 4:
1718       x.array [3] &= y.array [3];
1719       /* Fall through.  */
1720     case 3:
1721       x.array [2] &= y.array [2];
1722       /* Fall through.  */
1723     case 2:
1724       x.array [1] &= y.array [1];
1725       /* Fall through.  */
1726     case 1:
1727       x.array [0] &= y.array [0];
1728       break;
1729     default:
1730       abort ();
1731     }
1732   return x;
1733 }
1734
1735 static INLINE i386_cpu_flags
1736 cpu_flags_or (i386_cpu_flags x, i386_cpu_flags y)
1737 {
1738   switch (ARRAY_SIZE (x.array))
1739     {
1740     case 5:
1741       x.array [4] |= y.array [4];
1742       /* Fall through.  */
1743     case 4:
1744       x.array [3] |= y.array [3];
1745       /* Fall through.  */
1746     case 3:
1747       x.array [2] |= y.array [2];
1748       /* Fall through.  */
1749     case 2:
1750       x.array [1] |= y.array [1];
1751       /* Fall through.  */
1752     case 1:
1753       x.array [0] |= y.array [0];
1754       break;
1755     default:
1756       abort ();
1757     }
1758   return x;
1759 }
1760
1761 static INLINE i386_cpu_flags
1762 cpu_flags_and_not (i386_cpu_flags x, i386_cpu_flags y)
1763 {
1764   switch (ARRAY_SIZE (x.array))
1765     {
1766     case 5:
1767       x.array [4] &= ~y.array [4];
1768       /* Fall through.  */
1769     case 4:
1770       x.array [3] &= ~y.array [3];
1771       /* Fall through.  */
1772     case 3:
1773       x.array [2] &= ~y.array [2];
1774       /* Fall through.  */
1775     case 2:
1776       x.array [1] &= ~y.array [1];
1777       /* Fall through.  */
1778     case 1:
1779       x.array [0] &= ~y.array [0];
1780       break;
1781     default:
1782       abort ();
1783     }
1784   return x;
1785 }
1786
1787 static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
1788
1789 #define CPU_FLAGS_ARCH_MATCH            0x1
1790 #define CPU_FLAGS_64BIT_MATCH           0x2
1791
1792 #define CPU_FLAGS_PERFECT_MATCH \
1793   (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_64BIT_MATCH)
1794
1795 /* Return CPU flags match bits. */
1796
1797 static int
1798 cpu_flags_match (const insn_template *t)
1799 {
1800   i386_cpu_flags x = t->cpu_flags;
1801   int match = cpu_flags_check_cpu64 (x) ? CPU_FLAGS_64BIT_MATCH : 0;
1802
1803   x.bitfield.cpu64 = 0;
1804   x.bitfield.cpuno64 = 0;
1805
1806   if (cpu_flags_all_zero (&x))
1807     {
1808       /* This instruction is available on all archs.  */
1809       match |= CPU_FLAGS_ARCH_MATCH;
1810     }
1811   else
1812     {
1813       /* This instruction is available only on some archs.  */
1814       i386_cpu_flags cpu = cpu_arch_flags;
1815
1816       /* AVX512VL is no standalone feature - match it and then strip it.  */
1817       if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
1818         return match;
1819       x.bitfield.cpuavx512vl = 0;
1820
1821       /* AVX and AVX2 present at the same time express an operand size
1822          dependency - strip AVX2 for the purposes here.  The operand size
1823          dependent check occurs in check_vecOperands().  */
1824       if (x.bitfield.cpuavx && x.bitfield.cpuavx2)
1825         x.bitfield.cpuavx2 = 0;
1826
1827       cpu = cpu_flags_and (x, cpu);
1828       if (!cpu_flags_all_zero (&cpu))
1829         {
1830           if (x.bitfield.cpuavx)
1831             {
1832               /* We need to check a few extra flags with AVX.  */
1833               if (cpu.bitfield.cpuavx
1834                   && (!t->opcode_modifier.sse2avx
1835                       || (sse2avx && !i.prefix[DATA_PREFIX]))
1836                   && (!x.bitfield.cpuaes || cpu.bitfield.cpuaes)
1837                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1838                   && (!x.bitfield.cpupclmul || cpu.bitfield.cpupclmul))
1839                 match |= CPU_FLAGS_ARCH_MATCH;
1840             }
1841           else if (x.bitfield.cpuavx512f)
1842             {
1843               /* We need to check a few extra flags with AVX512F.  */
1844               if (cpu.bitfield.cpuavx512f
1845                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1846                   && (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes)
1847                   && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
1848                 match |= CPU_FLAGS_ARCH_MATCH;
1849             }
1850           else
1851             match |= CPU_FLAGS_ARCH_MATCH;
1852         }
1853     }
1854   return match;
1855 }
1856
1857 static INLINE i386_operand_type
1858 operand_type_and (i386_operand_type x, i386_operand_type y)
1859 {
1860   if (x.bitfield.class != y.bitfield.class)
1861     x.bitfield.class = ClassNone;
1862   if (x.bitfield.instance != y.bitfield.instance)
1863     x.bitfield.instance = InstanceNone;
1864
1865   switch (ARRAY_SIZE (x.array))
1866     {
1867     case 3:
1868       x.array [2] &= y.array [2];
1869       /* Fall through.  */
1870     case 2:
1871       x.array [1] &= y.array [1];
1872       /* Fall through.  */
1873     case 1:
1874       x.array [0] &= y.array [0];
1875       break;
1876     default:
1877       abort ();
1878     }
1879   return x;
1880 }
1881
1882 static INLINE i386_operand_type
1883 operand_type_and_not (i386_operand_type x, i386_operand_type y)
1884 {
1885   gas_assert (y.bitfield.class == ClassNone);
1886   gas_assert (y.bitfield.instance == InstanceNone);
1887
1888   switch (ARRAY_SIZE (x.array))
1889     {
1890     case 3:
1891       x.array [2] &= ~y.array [2];
1892       /* Fall through.  */
1893     case 2:
1894       x.array [1] &= ~y.array [1];
1895       /* Fall through.  */
1896     case 1:
1897       x.array [0] &= ~y.array [0];
1898       break;
1899     default:
1900       abort ();
1901     }
1902   return x;
1903 }
1904
1905 static INLINE i386_operand_type
1906 operand_type_or (i386_operand_type x, i386_operand_type y)
1907 {
1908   gas_assert (x.bitfield.class == ClassNone ||
1909               y.bitfield.class == ClassNone ||
1910               x.bitfield.class == y.bitfield.class);
1911   gas_assert (x.bitfield.instance == InstanceNone ||
1912               y.bitfield.instance == InstanceNone ||
1913               x.bitfield.instance == y.bitfield.instance);
1914
1915   switch (ARRAY_SIZE (x.array))
1916     {
1917     case 3:
1918       x.array [2] |= y.array [2];
1919       /* Fall through.  */
1920     case 2:
1921       x.array [1] |= y.array [1];
1922       /* Fall through.  */
1923     case 1:
1924       x.array [0] |= y.array [0];
1925       break;
1926     default:
1927       abort ();
1928     }
1929   return x;
1930 }
1931
1932 static INLINE i386_operand_type
1933 operand_type_xor (i386_operand_type x, i386_operand_type y)
1934 {
1935   gas_assert (y.bitfield.class == ClassNone);
1936   gas_assert (y.bitfield.instance == InstanceNone);
1937
1938   switch (ARRAY_SIZE (x.array))
1939     {
1940     case 3:
1941       x.array [2] ^= y.array [2];
1942       /* Fall through.  */
1943     case 2:
1944       x.array [1] ^= y.array [1];
1945       /* Fall through.  */
1946     case 1:
1947       x.array [0] ^= y.array [0];
1948       break;
1949     default:
1950       abort ();
1951     }
1952   return x;
1953 }
1954
1955 static const i386_operand_type disp16_32 = OPERAND_TYPE_DISP16_32;
1956 static const i386_operand_type anydisp = OPERAND_TYPE_ANYDISP;
1957 static const i386_operand_type regxmm = OPERAND_TYPE_REGXMM;
1958 static const i386_operand_type imm16 = OPERAND_TYPE_IMM16;
1959 static const i386_operand_type imm32 = OPERAND_TYPE_IMM32;
1960 static const i386_operand_type imm32s = OPERAND_TYPE_IMM32S;
1961 static const i386_operand_type imm16_32 = OPERAND_TYPE_IMM16_32;
1962 static const i386_operand_type imm16_32s = OPERAND_TYPE_IMM16_32S;
1963 static const i386_operand_type imm16_32_32s = OPERAND_TYPE_IMM16_32_32S;
1964
1965 enum operand_type
1966 {
1967   reg,
1968   imm,
1969   disp,
1970   anymem
1971 };
1972
1973 static INLINE int
1974 operand_type_check (i386_operand_type t, enum operand_type c)
1975 {
1976   switch (c)
1977     {
1978     case reg:
1979       return t.bitfield.class == Reg;
1980
1981     case imm:
1982       return (t.bitfield.imm8
1983               || t.bitfield.imm8s
1984               || t.bitfield.imm16
1985               || t.bitfield.imm32
1986               || t.bitfield.imm32s
1987               || t.bitfield.imm64);
1988
1989     case disp:
1990       return (t.bitfield.disp8
1991               || t.bitfield.disp16
1992               || t.bitfield.disp32
1993               || t.bitfield.disp64);
1994
1995     case anymem:
1996       return (t.bitfield.disp8
1997               || t.bitfield.disp16
1998               || t.bitfield.disp32
1999               || t.bitfield.disp64
2000               || t.bitfield.baseindex);
2001
2002     default:
2003       abort ();
2004     }
2005
2006   return 0;
2007 }
2008
2009 /* Return 1 if there is no conflict in 8bit/16bit/32bit/64bit/80bit size
2010    between operand GIVEN and opeand WANTED for instruction template T.  */
2011
2012 static INLINE int
2013 match_operand_size (const insn_template *t, unsigned int wanted,
2014                     unsigned int given)
2015 {
2016   return !((i.types[given].bitfield.byte
2017             && !t->operand_types[wanted].bitfield.byte)
2018            || (i.types[given].bitfield.word
2019                && !t->operand_types[wanted].bitfield.word)
2020            || (i.types[given].bitfield.dword
2021                && !t->operand_types[wanted].bitfield.dword)
2022            || (i.types[given].bitfield.qword
2023                && !t->operand_types[wanted].bitfield.qword)
2024            || (i.types[given].bitfield.tbyte
2025                && !t->operand_types[wanted].bitfield.tbyte));
2026 }
2027
2028 /* Return 1 if there is no conflict in SIMD register between operand
2029    GIVEN and opeand WANTED for instruction template T.  */
2030
2031 static INLINE int
2032 match_simd_size (const insn_template *t, unsigned int wanted,
2033                  unsigned int given)
2034 {
2035   return !((i.types[given].bitfield.xmmword
2036             && !t->operand_types[wanted].bitfield.xmmword)
2037            || (i.types[given].bitfield.ymmword
2038                && !t->operand_types[wanted].bitfield.ymmword)
2039            || (i.types[given].bitfield.zmmword
2040                && !t->operand_types[wanted].bitfield.zmmword)
2041            || (i.types[given].bitfield.tmmword
2042                && !t->operand_types[wanted].bitfield.tmmword));
2043 }
2044
2045 /* Return 1 if there is no conflict in any size between operand GIVEN
2046    and opeand WANTED for instruction template T.  */
2047
2048 static INLINE int
2049 match_mem_size (const insn_template *t, unsigned int wanted,
2050                 unsigned int given)
2051 {
2052   return (match_operand_size (t, wanted, given)
2053           && !((i.types[given].bitfield.unspecified
2054                 && !i.broadcast.type
2055                 && !i.broadcast.bytes
2056                 && !t->operand_types[wanted].bitfield.unspecified)
2057                || (i.types[given].bitfield.fword
2058                    && !t->operand_types[wanted].bitfield.fword)
2059                /* For scalar opcode templates to allow register and memory
2060                   operands at the same time, some special casing is needed
2061                   here.  Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
2062                   down-conversion vpmov*.  */
2063                || ((t->operand_types[wanted].bitfield.class == RegSIMD
2064                     && t->operand_types[wanted].bitfield.byte
2065                        + t->operand_types[wanted].bitfield.word
2066                        + t->operand_types[wanted].bitfield.dword
2067                        + t->operand_types[wanted].bitfield.qword
2068                        > !!t->opcode_modifier.broadcast)
2069                    ? (i.types[given].bitfield.xmmword
2070                       || i.types[given].bitfield.ymmword
2071                       || i.types[given].bitfield.zmmword)
2072                    : !match_simd_size(t, wanted, given))));
2073 }
2074
2075 /* Return value has MATCH_STRAIGHT set if there is no size conflict on any
2076    operands for instruction template T, and it has MATCH_REVERSE set if there
2077    is no size conflict on any operands for the template with operands reversed
2078    (and the template allows for reversing in the first place).  */
2079
2080 #define MATCH_STRAIGHT 1
2081 #define MATCH_REVERSE  2
2082
2083 static INLINE unsigned int
2084 operand_size_match (const insn_template *t)
2085 {
2086   unsigned int j, match = MATCH_STRAIGHT;
2087
2088   /* Don't check non-absolute jump instructions.  */
2089   if (t->opcode_modifier.jump
2090       && t->opcode_modifier.jump != JUMP_ABSOLUTE)
2091     return match;
2092
2093   /* Check memory and accumulator operand size.  */
2094   for (j = 0; j < i.operands; j++)
2095     {
2096       if (i.types[j].bitfield.class != Reg
2097           && i.types[j].bitfield.class != RegSIMD
2098           && t->opcode_modifier.operandconstraint == ANY_SIZE)
2099         continue;
2100
2101       if (t->operand_types[j].bitfield.class == Reg
2102           && !match_operand_size (t, j, j))
2103         {
2104           match = 0;
2105           break;
2106         }
2107
2108       if (t->operand_types[j].bitfield.class == RegSIMD
2109           && !match_simd_size (t, j, j))
2110         {
2111           match = 0;
2112           break;
2113         }
2114
2115       if (t->operand_types[j].bitfield.instance == Accum
2116           && (!match_operand_size (t, j, j) || !match_simd_size (t, j, j)))
2117         {
2118           match = 0;
2119           break;
2120         }
2121
2122       if ((i.flags[j] & Operand_Mem) && !match_mem_size (t, j, j))
2123         {
2124           match = 0;
2125           break;
2126         }
2127     }
2128
2129   if (!t->opcode_modifier.d)
2130     return match;
2131
2132   /* Check reverse.  */
2133   gas_assert ((i.operands >= 2 && i.operands <= 3)
2134               || t->opcode_modifier.vexsources);
2135
2136   for (j = 0; j < i.operands; j++)
2137     {
2138       unsigned int given = i.operands - j - 1;
2139
2140       /* For 4- and 5-operand insns VEX.W controls just the first two
2141          register operands.  */
2142       if (t->opcode_modifier.vexsources)
2143         given = j < 2 ? 1 - j : j;
2144
2145       if (t->operand_types[j].bitfield.class == Reg
2146           && !match_operand_size (t, j, given))
2147         return match;
2148
2149       if (t->operand_types[j].bitfield.class == RegSIMD
2150           && !match_simd_size (t, j, given))
2151         return match;
2152
2153       if (t->operand_types[j].bitfield.instance == Accum
2154           && (!match_operand_size (t, j, given)
2155               || !match_simd_size (t, j, given)))
2156         return match;
2157
2158       if ((i.flags[given] & Operand_Mem) && !match_mem_size (t, j, given))
2159         return match;
2160     }
2161
2162   return match | MATCH_REVERSE;
2163 }
2164
2165 static INLINE int
2166 operand_type_match (i386_operand_type overlap,
2167                     i386_operand_type given)
2168 {
2169   i386_operand_type temp = overlap;
2170
2171   temp.bitfield.unspecified = 0;
2172   temp.bitfield.byte = 0;
2173   temp.bitfield.word = 0;
2174   temp.bitfield.dword = 0;
2175   temp.bitfield.fword = 0;
2176   temp.bitfield.qword = 0;
2177   temp.bitfield.tbyte = 0;
2178   temp.bitfield.xmmword = 0;
2179   temp.bitfield.ymmword = 0;
2180   temp.bitfield.zmmword = 0;
2181   temp.bitfield.tmmword = 0;
2182   if (operand_type_all_zero (&temp))
2183     goto mismatch;
2184
2185   if (given.bitfield.baseindex == overlap.bitfield.baseindex)
2186     return 1;
2187
2188  mismatch:
2189   i.error = operand_type_mismatch;
2190   return 0;
2191 }
2192
2193 /* If given types g0 and g1 are registers they must be of the same type
2194    unless the expected operand type register overlap is null.
2195    Intel syntax sized memory operands are also checked here.  */
2196
2197 static INLINE int
2198 operand_type_register_match (i386_operand_type g0,
2199                              i386_operand_type t0,
2200                              i386_operand_type g1,
2201                              i386_operand_type t1)
2202 {
2203   if (g0.bitfield.class != Reg
2204       && g0.bitfield.class != RegSIMD
2205       && (g0.bitfield.unspecified
2206           || !operand_type_check (g0, anymem)))
2207     return 1;
2208
2209   if (g1.bitfield.class != Reg
2210       && g1.bitfield.class != RegSIMD
2211       && (g1.bitfield.unspecified
2212           || !operand_type_check (g1, anymem)))
2213     return 1;
2214
2215   if (g0.bitfield.byte == g1.bitfield.byte
2216       && g0.bitfield.word == g1.bitfield.word
2217       && g0.bitfield.dword == g1.bitfield.dword
2218       && g0.bitfield.qword == g1.bitfield.qword
2219       && g0.bitfield.xmmword == g1.bitfield.xmmword
2220       && g0.bitfield.ymmword == g1.bitfield.ymmword
2221       && g0.bitfield.zmmword == g1.bitfield.zmmword)
2222     return 1;
2223
2224   /* If expectations overlap in no more than a single size, all is fine. */
2225   g0 = operand_type_and (t0, t1);
2226   if (g0.bitfield.byte
2227       + g0.bitfield.word
2228       + g0.bitfield.dword
2229       + g0.bitfield.qword
2230       + g0.bitfield.xmmword
2231       + g0.bitfield.ymmword
2232       + g0.bitfield.zmmword <= 1)
2233     return 1;
2234
2235   i.error = register_type_mismatch;
2236
2237   return 0;
2238 }
2239
2240 static INLINE unsigned int
2241 register_number (const reg_entry *r)
2242 {
2243   unsigned int nr = r->reg_num;
2244
2245   if (r->reg_flags & RegRex)
2246     nr += 8;
2247
2248   if (r->reg_flags & RegVRex)
2249     nr += 16;
2250
2251   return nr;
2252 }
2253
2254 static INLINE unsigned int
2255 mode_from_disp_size (i386_operand_type t)
2256 {
2257   if (t.bitfield.disp8)
2258     return 1;
2259   else if (t.bitfield.disp16
2260            || t.bitfield.disp32)
2261     return 2;
2262   else
2263     return 0;
2264 }
2265
2266 static INLINE int
2267 fits_in_signed_byte (addressT num)
2268 {
2269   return num + 0x80 <= 0xff;
2270 }
2271
2272 static INLINE int
2273 fits_in_unsigned_byte (addressT num)
2274 {
2275   return num <= 0xff;
2276 }
2277
2278 static INLINE int
2279 fits_in_unsigned_word (addressT num)
2280 {
2281   return num <= 0xffff;
2282 }
2283
2284 static INLINE int
2285 fits_in_signed_word (addressT num)
2286 {
2287   return num + 0x8000 <= 0xffff;
2288 }
2289
2290 static INLINE int
2291 fits_in_signed_long (addressT num ATTRIBUTE_UNUSED)
2292 {
2293 #ifndef BFD64
2294   return 1;
2295 #else
2296   return num + 0x80000000 <= 0xffffffff;
2297 #endif
2298 }                               /* fits_in_signed_long() */
2299
2300 static INLINE int
2301 fits_in_unsigned_long (addressT num ATTRIBUTE_UNUSED)
2302 {
2303 #ifndef BFD64
2304   return 1;
2305 #else
2306   return num <= 0xffffffff;
2307 #endif
2308 }                               /* fits_in_unsigned_long() */
2309
2310 static INLINE valueT extend_to_32bit_address (addressT num)
2311 {
2312 #ifdef BFD64
2313   if (fits_in_unsigned_long(num))
2314     return (num ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
2315
2316   if (!fits_in_signed_long (num))
2317     return num & 0xffffffff;
2318 #endif
2319
2320   return num;
2321 }
2322
2323 static INLINE int
2324 fits_in_disp8 (offsetT num)
2325 {
2326   int shift = i.memshift;
2327   unsigned int mask;
2328
2329   if (shift == -1)
2330     abort ();
2331
2332   mask = (1 << shift) - 1;
2333
2334   /* Return 0 if NUM isn't properly aligned.  */
2335   if ((num & mask))
2336     return 0;
2337
2338   /* Check if NUM will fit in 8bit after shift.  */
2339   return fits_in_signed_byte (num >> shift);
2340 }
2341
2342 static INLINE int
2343 fits_in_imm4 (offsetT num)
2344 {
2345   return (num & 0xf) == num;
2346 }
2347
2348 static i386_operand_type
2349 smallest_imm_type (offsetT num)
2350 {
2351   i386_operand_type t;
2352
2353   operand_type_set (&t, 0);
2354   t.bitfield.imm64 = 1;
2355
2356   if (cpu_arch_tune != PROCESSOR_I486 && num == 1)
2357     {
2358       /* This code is disabled on the 486 because all the Imm1 forms
2359          in the opcode table are slower on the i486.  They're the
2360          versions with the implicitly specified single-position
2361          displacement, which has another syntax if you really want to
2362          use that form.  */
2363       t.bitfield.imm1 = 1;
2364       t.bitfield.imm8 = 1;
2365       t.bitfield.imm8s = 1;
2366       t.bitfield.imm16 = 1;
2367       t.bitfield.imm32 = 1;
2368       t.bitfield.imm32s = 1;
2369     }
2370   else if (fits_in_signed_byte (num))
2371     {
2372       t.bitfield.imm8 = 1;
2373       t.bitfield.imm8s = 1;
2374       t.bitfield.imm16 = 1;
2375       t.bitfield.imm32 = 1;
2376       t.bitfield.imm32s = 1;
2377     }
2378   else if (fits_in_unsigned_byte (num))
2379     {
2380       t.bitfield.imm8 = 1;
2381       t.bitfield.imm16 = 1;
2382       t.bitfield.imm32 = 1;
2383       t.bitfield.imm32s = 1;
2384     }
2385   else if (fits_in_signed_word (num) || fits_in_unsigned_word (num))
2386     {
2387       t.bitfield.imm16 = 1;
2388       t.bitfield.imm32 = 1;
2389       t.bitfield.imm32s = 1;
2390     }
2391   else if (fits_in_signed_long (num))
2392     {
2393       t.bitfield.imm32 = 1;
2394       t.bitfield.imm32s = 1;
2395     }
2396   else if (fits_in_unsigned_long (num))
2397     t.bitfield.imm32 = 1;
2398
2399   return t;
2400 }
2401
2402 static offsetT
2403 offset_in_range (offsetT val, int size)
2404 {
2405   addressT mask;
2406
2407   switch (size)
2408     {
2409     case 1: mask = ((addressT) 1 <<  8) - 1; break;
2410     case 2: mask = ((addressT) 1 << 16) - 1; break;
2411 #ifdef BFD64
2412     case 4: mask = ((addressT) 1 << 32) - 1; break;
2413 #endif
2414     case sizeof (val): return val;
2415     default: abort ();
2416     }
2417
2418   if ((val & ~mask) != 0 && (-val & ~mask) != 0)
2419     as_warn (_("0x%" PRIx64 " shortened to 0x%" PRIx64),
2420              (uint64_t) val, (uint64_t) (val & mask));
2421
2422   return val & mask;
2423 }
2424
2425 enum PREFIX_GROUP
2426 {
2427   PREFIX_EXIST = 0,
2428   PREFIX_LOCK,
2429   PREFIX_REP,
2430   PREFIX_DS,
2431   PREFIX_OTHER
2432 };
2433
2434 /* Returns
2435    a. PREFIX_EXIST if attempting to add a prefix where one from the
2436    same class already exists.
2437    b. PREFIX_LOCK if lock prefix is added.
2438    c. PREFIX_REP if rep/repne prefix is added.
2439    d. PREFIX_DS if ds prefix is added.
2440    e. PREFIX_OTHER if other prefix is added.
2441  */
2442
2443 static enum PREFIX_GROUP
2444 add_prefix (unsigned int prefix)
2445 {
2446   enum PREFIX_GROUP ret = PREFIX_OTHER;
2447   unsigned int q;
2448
2449   if (prefix >= REX_OPCODE && prefix < REX_OPCODE + 16
2450       && flag_code == CODE_64BIT)
2451     {
2452       if ((i.prefix[REX_PREFIX] & prefix & REX_W)
2453           || (i.prefix[REX_PREFIX] & prefix & REX_R)
2454           || (i.prefix[REX_PREFIX] & prefix & REX_X)
2455           || (i.prefix[REX_PREFIX] & prefix & REX_B))
2456         ret = PREFIX_EXIST;
2457       q = REX_PREFIX;
2458     }
2459   else
2460     {
2461       switch (prefix)
2462         {
2463         default:
2464           abort ();
2465
2466         case DS_PREFIX_OPCODE:
2467           ret = PREFIX_DS;
2468           /* Fall through.  */
2469         case CS_PREFIX_OPCODE:
2470         case ES_PREFIX_OPCODE:
2471         case FS_PREFIX_OPCODE:
2472         case GS_PREFIX_OPCODE:
2473         case SS_PREFIX_OPCODE:
2474           q = SEG_PREFIX;
2475           break;
2476
2477         case REPNE_PREFIX_OPCODE:
2478         case REPE_PREFIX_OPCODE:
2479           q = REP_PREFIX;
2480           ret = PREFIX_REP;
2481           break;
2482
2483         case LOCK_PREFIX_OPCODE:
2484           q = LOCK_PREFIX;
2485           ret = PREFIX_LOCK;
2486           break;
2487
2488         case FWAIT_OPCODE:
2489           q = WAIT_PREFIX;
2490           break;
2491
2492         case ADDR_PREFIX_OPCODE:
2493           q = ADDR_PREFIX;
2494           break;
2495
2496         case DATA_PREFIX_OPCODE:
2497           q = DATA_PREFIX;
2498           break;
2499         }
2500       if (i.prefix[q] != 0)
2501         ret = PREFIX_EXIST;
2502     }
2503
2504   if (ret)
2505     {
2506       if (!i.prefix[q])
2507         ++i.prefixes;
2508       i.prefix[q] |= prefix;
2509     }
2510   else
2511     as_bad (_("same type of prefix used twice"));
2512
2513   return ret;
2514 }
2515
2516 static void
2517 update_code_flag (int value, int check)
2518 {
2519   PRINTF_LIKE ((*as_error));
2520
2521   flag_code = (enum flag_code) value;
2522   if (flag_code == CODE_64BIT)
2523     {
2524       cpu_arch_flags.bitfield.cpu64 = 1;
2525       cpu_arch_flags.bitfield.cpuno64 = 0;
2526     }
2527   else
2528     {
2529       cpu_arch_flags.bitfield.cpu64 = 0;
2530       cpu_arch_flags.bitfield.cpuno64 = 1;
2531     }
2532   if (value == CODE_64BIT && !cpu_arch_flags.bitfield.cpulm )
2533     {
2534       if (check)
2535         as_error = as_fatal;
2536       else
2537         as_error = as_bad;
2538       (*as_error) (_("64bit mode not supported on `%s'."),
2539                    cpu_arch_name ? cpu_arch_name : default_arch);
2540     }
2541   if (value == CODE_32BIT && !cpu_arch_flags.bitfield.cpui386)
2542     {
2543       if (check)
2544         as_error = as_fatal;
2545       else
2546         as_error = as_bad;
2547       (*as_error) (_("32bit mode not supported on `%s'."),
2548                    cpu_arch_name ? cpu_arch_name : default_arch);
2549     }
2550   stackop_size = '\0';
2551 }
2552
2553 static void
2554 set_code_flag (int value)
2555 {
2556   update_code_flag (value, 0);
2557 }
2558
2559 static void
2560 set_16bit_gcc_code_flag (int new_code_flag)
2561 {
2562   flag_code = (enum flag_code) new_code_flag;
2563   if (flag_code != CODE_16BIT)
2564     abort ();
2565   cpu_arch_flags.bitfield.cpu64 = 0;
2566   cpu_arch_flags.bitfield.cpuno64 = 1;
2567   stackop_size = LONG_MNEM_SUFFIX;
2568 }
2569
2570 static void
2571 set_intel_syntax (int syntax_flag)
2572 {
2573   /* Find out if register prefixing is specified.  */
2574   int ask_naked_reg = 0;
2575
2576   SKIP_WHITESPACE ();
2577   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2578     {
2579       char *string;
2580       int e = get_symbol_name (&string);
2581
2582       if (strcmp (string, "prefix") == 0)
2583         ask_naked_reg = 1;
2584       else if (strcmp (string, "noprefix") == 0)
2585         ask_naked_reg = -1;
2586       else
2587         as_bad (_("bad argument to syntax directive."));
2588       (void) restore_line_pointer (e);
2589     }
2590   demand_empty_rest_of_line ();
2591
2592   intel_syntax = syntax_flag;
2593
2594   if (ask_naked_reg == 0)
2595     allow_naked_reg = (intel_syntax
2596                        && (bfd_get_symbol_leading_char (stdoutput) != '\0'));
2597   else
2598     allow_naked_reg = (ask_naked_reg < 0);
2599
2600   expr_set_rank (O_full_ptr, syntax_flag ? 10 : 0);
2601
2602   identifier_chars['%'] = intel_syntax && allow_naked_reg ? '%' : 0;
2603   identifier_chars['$'] = intel_syntax ? '$' : 0;
2604   register_prefix = allow_naked_reg ? "" : "%";
2605 }
2606
2607 static void
2608 set_intel_mnemonic (int mnemonic_flag)
2609 {
2610   intel_mnemonic = mnemonic_flag;
2611 }
2612
2613 static void
2614 set_allow_index_reg (int flag)
2615 {
2616   allow_index_reg = flag;
2617 }
2618
2619 static void
2620 set_check (int what)
2621 {
2622   enum check_kind *kind;
2623   const char *str;
2624
2625   if (what)
2626     {
2627       kind = &operand_check;
2628       str = "operand";
2629     }
2630   else
2631     {
2632       kind = &sse_check;
2633       str = "sse";
2634     }
2635
2636   SKIP_WHITESPACE ();
2637
2638   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2639     {
2640       char *string;
2641       int e = get_symbol_name (&string);
2642
2643       if (strcmp (string, "none") == 0)
2644         *kind = check_none;
2645       else if (strcmp (string, "warning") == 0)
2646         *kind = check_warning;
2647       else if (strcmp (string, "error") == 0)
2648         *kind = check_error;
2649       else
2650         as_bad (_("bad argument to %s_check directive."), str);
2651       (void) restore_line_pointer (e);
2652     }
2653   else
2654     as_bad (_("missing argument for %s_check directive"), str);
2655
2656   demand_empty_rest_of_line ();
2657 }
2658
2659 static void
2660 check_cpu_arch_compatible (const char *name ATTRIBUTE_UNUSED,
2661                            i386_cpu_flags new_flag ATTRIBUTE_UNUSED)
2662 {
2663 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
2664   static const char *arch;
2665
2666   /* Intel MCU is only supported on ELF.  */
2667   if (!IS_ELF)
2668     return;
2669
2670   if (!arch)
2671     {
2672       /* Use cpu_arch_name if it is set in md_parse_option.  Otherwise
2673          use default_arch.  */
2674       arch = cpu_arch_name;
2675       if (!arch)
2676         arch = default_arch;
2677     }
2678
2679   /* If we are targeting Intel MCU, we must enable it.  */
2680   if ((get_elf_backend_data (stdoutput)->elf_machine_code == EM_IAMCU)
2681       == new_flag.bitfield.cpuiamcu)
2682     return;
2683
2684   as_bad (_("`%s' is not supported on `%s'"), name, arch);
2685 #endif
2686 }
2687
2688 static void
2689 extend_cpu_sub_arch_name (const char *name)
2690 {
2691   if (cpu_sub_arch_name)
2692     cpu_sub_arch_name = reconcat (cpu_sub_arch_name, cpu_sub_arch_name,
2693                                   ".", name, (const char *) NULL);
2694   else
2695     cpu_sub_arch_name = concat (".", name, (const char *) NULL);
2696 }
2697
2698 static void
2699 set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
2700 {
2701   typedef struct arch_stack_entry
2702   {
2703     const struct arch_stack_entry *prev;
2704     const char *name;
2705     char *sub_name;
2706     i386_cpu_flags flags;
2707     i386_cpu_flags isa_flags;
2708     enum processor_type isa;
2709     enum flag_code flag_code;
2710     char stackop_size;
2711     bool no_cond_jump_promotion;
2712   } arch_stack_entry;
2713   static const arch_stack_entry *arch_stack_top;
2714
2715   SKIP_WHITESPACE ();
2716
2717   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2718     {
2719       char *s;
2720       int e = get_symbol_name (&s);
2721       const char *string = s;
2722       unsigned int j = 0;
2723       i386_cpu_flags flags;
2724
2725       if (strcmp (string, "default") == 0)
2726         {
2727           if (strcmp (default_arch, "iamcu") == 0)
2728             string = default_arch;
2729           else
2730             {
2731               static const i386_cpu_flags cpu_unknown_flags = CPU_UNKNOWN_FLAGS;
2732
2733               cpu_arch_name = NULL;
2734               free (cpu_sub_arch_name);
2735               cpu_sub_arch_name = NULL;
2736               cpu_arch_flags = cpu_unknown_flags;
2737               if (flag_code == CODE_64BIT)
2738                 {
2739                   cpu_arch_flags.bitfield.cpu64 = 1;
2740                   cpu_arch_flags.bitfield.cpuno64 = 0;
2741                 }
2742               else
2743                 {
2744                   cpu_arch_flags.bitfield.cpu64 = 0;
2745                   cpu_arch_flags.bitfield.cpuno64 = 1;
2746                 }
2747               cpu_arch_isa = PROCESSOR_UNKNOWN;
2748               cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
2749               if (!cpu_arch_tune_set)
2750                 {
2751                   cpu_arch_tune = cpu_arch_isa;
2752                   cpu_arch_tune_flags = cpu_arch_isa_flags;
2753                 }
2754
2755               j = ARRAY_SIZE (cpu_arch) + 1;
2756             }
2757         }
2758       else if (strcmp (string, "push") == 0)
2759         {
2760           arch_stack_entry *top = XNEW (arch_stack_entry);
2761
2762           top->name = cpu_arch_name;
2763           if (cpu_sub_arch_name)
2764             top->sub_name = xstrdup (cpu_sub_arch_name);
2765           else
2766             top->sub_name = NULL;
2767           top->flags = cpu_arch_flags;
2768           top->isa = cpu_arch_isa;
2769           top->isa_flags = cpu_arch_isa_flags;
2770           top->flag_code = flag_code;
2771           top->stackop_size = stackop_size;
2772           top->no_cond_jump_promotion = no_cond_jump_promotion;
2773
2774           top->prev = arch_stack_top;
2775           arch_stack_top = top;
2776
2777           (void) restore_line_pointer (e);
2778           demand_empty_rest_of_line ();
2779           return;
2780         }
2781       else if (strcmp (string, "pop") == 0)
2782         {
2783           const arch_stack_entry *top = arch_stack_top;
2784
2785           if (!top)
2786             as_bad (_(".arch stack is empty"));
2787           else if (top->flag_code != flag_code
2788                    || top->stackop_size != stackop_size)
2789             {
2790               static const unsigned int bits[] = {
2791                 [CODE_16BIT] = 16,
2792                 [CODE_32BIT] = 32,
2793                 [CODE_64BIT] = 64,
2794               };
2795
2796               as_bad (_("this `.arch pop' requires `.code%u%s' to be in effect"),
2797                       bits[top->flag_code],
2798                       top->stackop_size == LONG_MNEM_SUFFIX ? "gcc" : "");
2799             }
2800           else
2801             {
2802               arch_stack_top = top->prev;
2803
2804               cpu_arch_name = top->name;
2805               free (cpu_sub_arch_name);
2806               cpu_sub_arch_name = top->sub_name;
2807               cpu_arch_flags = top->flags;
2808               cpu_arch_isa = top->isa;
2809               cpu_arch_isa_flags = top->isa_flags;
2810               no_cond_jump_promotion = top->no_cond_jump_promotion;
2811
2812               XDELETE (top);
2813             }
2814
2815           (void) restore_line_pointer (e);
2816           demand_empty_rest_of_line ();
2817           return;
2818         }
2819
2820       for (; j < ARRAY_SIZE (cpu_arch); j++)
2821         {
2822           if (strcmp (string + (*string == '.'), cpu_arch[j].name) == 0
2823              && (*string == '.') == (cpu_arch[j].type == PROCESSOR_NONE))
2824             {
2825               if (*string != '.')
2826                 {
2827                   check_cpu_arch_compatible (string, cpu_arch[j].enable);
2828
2829                   cpu_arch_name = cpu_arch[j].name;
2830                   free (cpu_sub_arch_name);
2831                   cpu_sub_arch_name = NULL;
2832                   cpu_arch_flags = cpu_arch[j].enable;
2833                   if (flag_code == CODE_64BIT)
2834                     {
2835                       cpu_arch_flags.bitfield.cpu64 = 1;
2836                       cpu_arch_flags.bitfield.cpuno64 = 0;
2837                     }
2838                   else
2839                     {
2840                       cpu_arch_flags.bitfield.cpu64 = 0;
2841                       cpu_arch_flags.bitfield.cpuno64 = 1;
2842                     }
2843                   cpu_arch_isa = cpu_arch[j].type;
2844                   cpu_arch_isa_flags = cpu_arch[j].enable;
2845                   if (!cpu_arch_tune_set)
2846                     {
2847                       cpu_arch_tune = cpu_arch_isa;
2848                       cpu_arch_tune_flags = cpu_arch_isa_flags;
2849                     }
2850                   pre_386_16bit_warned = false;
2851                   break;
2852                 }
2853
2854               if (cpu_flags_all_zero (&cpu_arch[j].enable))
2855                 continue;
2856
2857               flags = cpu_flags_or (cpu_arch_flags,
2858                                     cpu_arch[j].enable);
2859
2860               if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2861                 {
2862                   extend_cpu_sub_arch_name (string + 1);
2863                   cpu_arch_flags = flags;
2864                   cpu_arch_isa_flags = flags;
2865                 }
2866               else
2867                 cpu_arch_isa_flags
2868                   = cpu_flags_or (cpu_arch_isa_flags,
2869                                   cpu_arch[j].enable);
2870               (void) restore_line_pointer (e);
2871               demand_empty_rest_of_line ();
2872               return;
2873             }
2874         }
2875
2876       if (startswith (string, ".no") && j >= ARRAY_SIZE (cpu_arch))
2877         {
2878           /* Disable an ISA extension.  */
2879           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
2880             if (cpu_arch[j].type == PROCESSOR_NONE
2881                 && strcmp (string + 3, cpu_arch[j].name) == 0)
2882               {
2883                 flags = cpu_flags_and_not (cpu_arch_flags,
2884                                            cpu_arch[j].disable);
2885                 if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2886                   {
2887                     extend_cpu_sub_arch_name (string + 1);
2888                     cpu_arch_flags = flags;
2889                     cpu_arch_isa_flags = flags;
2890                   }
2891                 (void) restore_line_pointer (e);
2892                 demand_empty_rest_of_line ();
2893                 return;
2894               }
2895         }
2896
2897       if (j == ARRAY_SIZE (cpu_arch))
2898         as_bad (_("no such architecture: `%s'"), string);
2899
2900       *input_line_pointer = e;
2901     }
2902   else
2903     as_bad (_("missing cpu architecture"));
2904
2905   no_cond_jump_promotion = 0;
2906   if (*input_line_pointer == ','
2907       && !is_end_of_line[(unsigned char) input_line_pointer[1]])
2908     {
2909       char *string;
2910       char e;
2911
2912       ++input_line_pointer;
2913       e = get_symbol_name (&string);
2914
2915       if (strcmp (string, "nojumps") == 0)
2916         no_cond_jump_promotion = 1;
2917       else if (strcmp (string, "jumps") == 0)
2918         ;
2919       else
2920         as_bad (_("no such architecture modifier: `%s'"), string);
2921
2922       (void) restore_line_pointer (e);
2923     }
2924
2925   demand_empty_rest_of_line ();
2926 }
2927
2928 enum bfd_architecture
2929 i386_arch (void)
2930 {
2931   if (cpu_arch_isa == PROCESSOR_IAMCU)
2932     {
2933       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2934           || flag_code == CODE_64BIT)
2935         as_fatal (_("Intel MCU is 32bit ELF only"));
2936       return bfd_arch_iamcu;
2937     }
2938   else
2939     return bfd_arch_i386;
2940 }
2941
2942 unsigned long
2943 i386_mach (void)
2944 {
2945   if (startswith (default_arch, "x86_64"))
2946     {
2947       if (default_arch[6] == '\0')
2948         return bfd_mach_x86_64;
2949       else
2950         return bfd_mach_x64_32;
2951     }
2952   else if (!strcmp (default_arch, "i386")
2953            || !strcmp (default_arch, "iamcu"))
2954     {
2955       if (cpu_arch_isa == PROCESSOR_IAMCU)
2956         {
2957           if (OUTPUT_FLAVOR != bfd_target_elf_flavour)
2958             as_fatal (_("Intel MCU is 32bit ELF only"));
2959           return bfd_mach_i386_iamcu;
2960         }
2961       else
2962         return bfd_mach_i386_i386;
2963     }
2964   else
2965     as_fatal (_("unknown architecture"));
2966 }
2967 \f
2968 void
2969 md_begin (void)
2970 {
2971   /* Support pseudo prefixes like {disp32}.  */
2972   lex_type ['{'] = LEX_BEGIN_NAME;
2973
2974   /* Initialize op_hash hash table.  */
2975   op_hash = str_htab_create ();
2976
2977   {
2978     const insn_template *optab;
2979     templates *core_optab;
2980
2981     /* Setup for loop.  */
2982     optab = i386_optab;
2983     core_optab = notes_alloc (sizeof (*core_optab));
2984     core_optab->start = optab;
2985
2986     while (1)
2987       {
2988         ++optab;
2989         if (optab->name == NULL
2990             || strcmp (optab->name, (optab - 1)->name) != 0)
2991           {
2992             /* different name --> ship out current template list;
2993                add to hash table; & begin anew.  */
2994             core_optab->end = optab;
2995             if (str_hash_insert (op_hash, (optab - 1)->name, core_optab, 0))
2996               as_fatal (_("duplicate %s"), (optab - 1)->name);
2997
2998             if (optab->name == NULL)
2999               break;
3000             core_optab = notes_alloc (sizeof (*core_optab));
3001             core_optab->start = optab;
3002           }
3003       }
3004   }
3005
3006   /* Initialize reg_hash hash table.  */
3007   reg_hash = str_htab_create ();
3008   {
3009     const reg_entry *regtab;
3010     unsigned int regtab_size = i386_regtab_size;
3011
3012     for (regtab = i386_regtab; regtab_size--; regtab++)
3013       {
3014         switch (regtab->reg_type.bitfield.class)
3015           {
3016           case Reg:
3017             if (regtab->reg_type.bitfield.dword)
3018               {
3019                 if (regtab->reg_type.bitfield.instance == Accum)
3020                   reg_eax = regtab;
3021               }
3022             else if (regtab->reg_type.bitfield.tbyte)
3023               {
3024                 /* There's no point inserting st(<N>) in the hash table, as
3025                    parentheses aren't included in register_chars[] anyway.  */
3026                 if (regtab->reg_type.bitfield.instance != Accum)
3027                   continue;
3028                 reg_st0 = regtab;
3029               }
3030             break;
3031
3032           case SReg:
3033             switch (regtab->reg_num)
3034               {
3035               case 0: reg_es = regtab; break;
3036               case 2: reg_ss = regtab; break;
3037               case 3: reg_ds = regtab; break;
3038               }
3039             break;
3040
3041           case RegMask:
3042             if (!regtab->reg_num)
3043               reg_k0 = regtab;
3044             break;
3045           }
3046
3047         if (str_hash_insert (reg_hash, regtab->reg_name, regtab, 0) != NULL)
3048           as_fatal (_("duplicate %s"), regtab->reg_name);
3049       }
3050   }
3051
3052   /* Fill in lexical tables:  mnemonic_chars, operand_chars.  */
3053   {
3054     int c;
3055     char *p;
3056
3057     for (c = 0; c < 256; c++)
3058       {
3059         if (ISDIGIT (c) || ISLOWER (c))
3060           {
3061             mnemonic_chars[c] = c;
3062             register_chars[c] = c;
3063             operand_chars[c] = c;
3064           }
3065         else if (ISUPPER (c))
3066           {
3067             mnemonic_chars[c] = TOLOWER (c);
3068             register_chars[c] = mnemonic_chars[c];
3069             operand_chars[c] = c;
3070           }
3071         else if (c == '{' || c == '}')
3072           {
3073             mnemonic_chars[c] = c;
3074             operand_chars[c] = c;
3075           }
3076 #ifdef SVR4_COMMENT_CHARS
3077         else if (c == '\\' && strchr (i386_comment_chars, '/'))
3078           operand_chars[c] = c;
3079 #endif
3080
3081         if (ISALPHA (c) || ISDIGIT (c))
3082           identifier_chars[c] = c;
3083         else if (c >= 128)
3084           {
3085             identifier_chars[c] = c;
3086             operand_chars[c] = c;
3087           }
3088       }
3089
3090 #ifdef LEX_AT
3091     identifier_chars['@'] = '@';
3092 #endif
3093 #ifdef LEX_QM
3094     identifier_chars['?'] = '?';
3095     operand_chars['?'] = '?';
3096 #endif
3097     mnemonic_chars['_'] = '_';
3098     mnemonic_chars['-'] = '-';
3099     mnemonic_chars['.'] = '.';
3100     identifier_chars['_'] = '_';
3101     identifier_chars['.'] = '.';
3102
3103     for (p = operand_special_chars; *p != '\0'; p++)
3104       operand_chars[(unsigned char) *p] = *p;
3105   }
3106
3107   if (flag_code == CODE_64BIT)
3108     {
3109 #if defined (OBJ_COFF) && defined (TE_PE)
3110       x86_dwarf2_return_column = (OUTPUT_FLAVOR == bfd_target_coff_flavour
3111                                   ? 32 : 16);
3112 #else
3113       x86_dwarf2_return_column = 16;
3114 #endif
3115       x86_cie_data_alignment = -8;
3116 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3117       x86_sframe_cfa_sp_reg = 7;
3118       x86_sframe_cfa_fp_reg = 6;
3119 #endif
3120     }
3121   else
3122     {
3123       x86_dwarf2_return_column = 8;
3124       x86_cie_data_alignment = -4;
3125     }
3126
3127   /* NB: FUSED_JCC_PADDING frag must have sufficient room so that it
3128      can be turned into BRANCH_PREFIX frag.  */
3129   if (align_branch_prefix_size > MAX_FUSED_JCC_PADDING_SIZE)
3130     abort ();
3131 }
3132
3133 void
3134 i386_print_statistics (FILE *file)
3135 {
3136   htab_print_statistics (file, "i386 opcode", op_hash);
3137   htab_print_statistics (file, "i386 register", reg_hash);
3138 }
3139
3140 void
3141 i386_md_end (void)
3142 {
3143   htab_delete (op_hash);
3144   htab_delete (reg_hash);
3145 }
3146 \f
3147 #ifdef DEBUG386
3148
3149 /* Debugging routines for md_assemble.  */
3150 static void pte (insn_template *);
3151 static void pt (i386_operand_type);
3152 static void pe (expressionS *);
3153 static void ps (symbolS *);
3154
3155 static void
3156 pi (const char *line, i386_insn *x)
3157 {
3158   unsigned int j;
3159
3160   fprintf (stdout, "%s: template ", line);
3161   pte (&x->tm);
3162   fprintf (stdout, "  address: base %s  index %s  scale %x\n",
3163            x->base_reg ? x->base_reg->reg_name : "none",
3164            x->index_reg ? x->index_reg->reg_name : "none",
3165            x->log2_scale_factor);
3166   fprintf (stdout, "  modrm:  mode %x  reg %x  reg/mem %x\n",
3167            x->rm.mode, x->rm.reg, x->rm.regmem);
3168   fprintf (stdout, "  sib:  base %x  index %x  scale %x\n",
3169            x->sib.base, x->sib.index, x->sib.scale);
3170   fprintf (stdout, "  rex: 64bit %x  extX %x  extY %x  extZ %x\n",
3171            (x->rex & REX_W) != 0,
3172            (x->rex & REX_R) != 0,
3173            (x->rex & REX_X) != 0,
3174            (x->rex & REX_B) != 0);
3175   for (j = 0; j < x->operands; j++)
3176     {
3177       fprintf (stdout, "    #%d:  ", j + 1);
3178       pt (x->types[j]);
3179       fprintf (stdout, "\n");
3180       if (x->types[j].bitfield.class == Reg
3181           || x->types[j].bitfield.class == RegMMX
3182           || x->types[j].bitfield.class == RegSIMD
3183           || x->types[j].bitfield.class == RegMask
3184           || x->types[j].bitfield.class == SReg
3185           || x->types[j].bitfield.class == RegCR
3186           || x->types[j].bitfield.class == RegDR
3187           || x->types[j].bitfield.class == RegTR
3188           || x->types[j].bitfield.class == RegBND)
3189         fprintf (stdout, "%s\n", x->op[j].regs->reg_name);
3190       if (operand_type_check (x->types[j], imm))
3191         pe (x->op[j].imms);
3192       if (operand_type_check (x->types[j], disp))
3193         pe (x->op[j].disps);
3194     }
3195 }
3196
3197 static void
3198 pte (insn_template *t)
3199 {
3200   static const unsigned char opc_pfx[] = { 0, 0x66, 0xf3, 0xf2 };
3201   static const char *const opc_spc[] = {
3202     NULL, "0f", "0f38", "0f3a", NULL, "evexmap5", "evexmap6", NULL,
3203     "XOP08", "XOP09", "XOP0A",
3204   };
3205   unsigned int j;
3206
3207   fprintf (stdout, " %d operands ", t->operands);
3208   if (opc_pfx[t->opcode_modifier.opcodeprefix])
3209     fprintf (stdout, "pfx %x ", opc_pfx[t->opcode_modifier.opcodeprefix]);
3210   if (opc_spc[t->opcode_modifier.opcodespace])
3211     fprintf (stdout, "space %s ", opc_spc[t->opcode_modifier.opcodespace]);
3212   fprintf (stdout, "opcode %x ", t->base_opcode);
3213   if (t->extension_opcode != None)
3214     fprintf (stdout, "ext %x ", t->extension_opcode);
3215   if (t->opcode_modifier.d)
3216     fprintf (stdout, "D");
3217   if (t->opcode_modifier.w)
3218     fprintf (stdout, "W");
3219   fprintf (stdout, "\n");
3220   for (j = 0; j < t->operands; j++)
3221     {
3222       fprintf (stdout, "    #%d type ", j + 1);
3223       pt (t->operand_types[j]);
3224       fprintf (stdout, "\n");
3225     }
3226 }
3227
3228 static void
3229 pe (expressionS *e)
3230 {
3231   fprintf (stdout, "    operation     %d\n", e->X_op);
3232   fprintf (stdout, "    add_number    %" PRId64 " (%" PRIx64 ")\n",
3233            (int64_t) e->X_add_number, (uint64_t) (valueT) e->X_add_number);
3234   if (e->X_add_symbol)
3235     {
3236       fprintf (stdout, "    add_symbol    ");
3237       ps (e->X_add_symbol);
3238       fprintf (stdout, "\n");
3239     }
3240   if (e->X_op_symbol)
3241     {
3242       fprintf (stdout, "    op_symbol    ");
3243       ps (e->X_op_symbol);
3244       fprintf (stdout, "\n");
3245     }
3246 }
3247
3248 static void
3249 ps (symbolS *s)
3250 {
3251   fprintf (stdout, "%s type %s%s",
3252            S_GET_NAME (s),
3253            S_IS_EXTERNAL (s) ? "EXTERNAL " : "",
3254            segment_name (S_GET_SEGMENT (s)));
3255 }
3256
3257 static struct type_name
3258   {
3259     i386_operand_type mask;
3260     const char *name;
3261   }
3262 const type_names[] =
3263 {
3264   { OPERAND_TYPE_REG8, "r8" },
3265   { OPERAND_TYPE_REG16, "r16" },
3266   { OPERAND_TYPE_REG32, "r32" },
3267   { OPERAND_TYPE_REG64, "r64" },
3268   { OPERAND_TYPE_ACC8, "acc8" },
3269   { OPERAND_TYPE_ACC16, "acc16" },
3270   { OPERAND_TYPE_ACC32, "acc32" },
3271   { OPERAND_TYPE_ACC64, "acc64" },
3272   { OPERAND_TYPE_IMM8, "i8" },
3273   { OPERAND_TYPE_IMM8, "i8s" },
3274   { OPERAND_TYPE_IMM16, "i16" },
3275   { OPERAND_TYPE_IMM32, "i32" },
3276   { OPERAND_TYPE_IMM32S, "i32s" },
3277   { OPERAND_TYPE_IMM64, "i64" },
3278   { OPERAND_TYPE_IMM1, "i1" },
3279   { OPERAND_TYPE_BASEINDEX, "BaseIndex" },
3280   { OPERAND_TYPE_DISP8, "d8" },
3281   { OPERAND_TYPE_DISP16, "d16" },
3282   { OPERAND_TYPE_DISP32, "d32" },
3283   { OPERAND_TYPE_DISP64, "d64" },
3284   { OPERAND_TYPE_INOUTPORTREG, "InOutPortReg" },
3285   { OPERAND_TYPE_SHIFTCOUNT, "ShiftCount" },
3286   { OPERAND_TYPE_CONTROL, "control reg" },
3287   { OPERAND_TYPE_TEST, "test reg" },
3288   { OPERAND_TYPE_DEBUG, "debug reg" },
3289   { OPERAND_TYPE_FLOATREG, "FReg" },
3290   { OPERAND_TYPE_FLOATACC, "FAcc" },
3291   { OPERAND_TYPE_SREG, "SReg" },
3292   { OPERAND_TYPE_REGMMX, "rMMX" },
3293   { OPERAND_TYPE_REGXMM, "rXMM" },
3294   { OPERAND_TYPE_REGYMM, "rYMM" },
3295   { OPERAND_TYPE_REGZMM, "rZMM" },
3296   { OPERAND_TYPE_REGTMM, "rTMM" },
3297   { OPERAND_TYPE_REGMASK, "Mask reg" },
3298 };
3299
3300 static void
3301 pt (i386_operand_type t)
3302 {
3303   unsigned int j;
3304   i386_operand_type a;
3305
3306   for (j = 0; j < ARRAY_SIZE (type_names); j++)
3307     {
3308       a = operand_type_and (t, type_names[j].mask);
3309       if (operand_type_equal (&a, &type_names[j].mask))
3310         fprintf (stdout, "%s, ",  type_names[j].name);
3311     }
3312   fflush (stdout);
3313 }
3314
3315 #endif /* DEBUG386 */
3316 \f
3317 static bfd_reloc_code_real_type
3318 reloc (unsigned int size,
3319        int pcrel,
3320        int sign,
3321        bfd_reloc_code_real_type other)
3322 {
3323   if (other != NO_RELOC)
3324     {
3325       reloc_howto_type *rel;
3326
3327       if (size == 8)
3328         switch (other)
3329           {
3330           case BFD_RELOC_X86_64_GOT32:
3331             return BFD_RELOC_X86_64_GOT64;
3332             break;
3333           case BFD_RELOC_X86_64_GOTPLT64:
3334             return BFD_RELOC_X86_64_GOTPLT64;
3335             break;
3336           case BFD_RELOC_X86_64_PLTOFF64:
3337             return BFD_RELOC_X86_64_PLTOFF64;
3338             break;
3339           case BFD_RELOC_X86_64_GOTPC32:
3340             other = BFD_RELOC_X86_64_GOTPC64;
3341             break;
3342           case BFD_RELOC_X86_64_GOTPCREL:
3343             other = BFD_RELOC_X86_64_GOTPCREL64;
3344             break;
3345           case BFD_RELOC_X86_64_TPOFF32:
3346             other = BFD_RELOC_X86_64_TPOFF64;
3347             break;
3348           case BFD_RELOC_X86_64_DTPOFF32:
3349             other = BFD_RELOC_X86_64_DTPOFF64;
3350             break;
3351           default:
3352             break;
3353           }
3354
3355 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3356       if (other == BFD_RELOC_SIZE32)
3357         {
3358           if (size == 8)
3359             other = BFD_RELOC_SIZE64;
3360           if (pcrel)
3361             {
3362               as_bad (_("there are no pc-relative size relocations"));
3363               return NO_RELOC;
3364             }
3365         }
3366 #endif
3367
3368       /* Sign-checking 4-byte relocations in 16-/32-bit code is pointless.  */
3369       if (size == 4 && (flag_code != CODE_64BIT || disallow_64bit_reloc))
3370         sign = -1;
3371
3372       rel = bfd_reloc_type_lookup (stdoutput, other);
3373       if (!rel)
3374         as_bad (_("unknown relocation (%u)"), other);
3375       else if (size != bfd_get_reloc_size (rel))
3376         as_bad (_("%u-byte relocation cannot be applied to %u-byte field"),
3377                 bfd_get_reloc_size (rel),
3378                 size);
3379       else if (pcrel && !rel->pc_relative)
3380         as_bad (_("non-pc-relative relocation for pc-relative field"));
3381       else if ((rel->complain_on_overflow == complain_overflow_signed
3382                 && !sign)
3383                || (rel->complain_on_overflow == complain_overflow_unsigned
3384                    && sign > 0))
3385         as_bad (_("relocated field and relocation type differ in signedness"));
3386       else
3387         return other;
3388       return NO_RELOC;
3389     }
3390
3391   if (pcrel)
3392     {
3393       if (!sign)
3394         as_bad (_("there are no unsigned pc-relative relocations"));
3395       switch (size)
3396         {
3397         case 1: return BFD_RELOC_8_PCREL;
3398         case 2: return BFD_RELOC_16_PCREL;
3399         case 4: return BFD_RELOC_32_PCREL;
3400         case 8: return BFD_RELOC_64_PCREL;
3401         }
3402       as_bad (_("cannot do %u byte pc-relative relocation"), size);
3403     }
3404   else
3405     {
3406       if (sign > 0)
3407         switch (size)
3408           {
3409           case 4: return BFD_RELOC_X86_64_32S;
3410           }
3411       else
3412         switch (size)
3413           {
3414           case 1: return BFD_RELOC_8;
3415           case 2: return BFD_RELOC_16;
3416           case 4: return BFD_RELOC_32;
3417           case 8: return BFD_RELOC_64;
3418           }
3419       as_bad (_("cannot do %s %u byte relocation"),
3420               sign > 0 ? "signed" : "unsigned", size);
3421     }
3422
3423   return NO_RELOC;
3424 }
3425
3426 /* Here we decide which fixups can be adjusted to make them relative to
3427    the beginning of the section instead of the symbol.  Basically we need
3428    to make sure that the dynamic relocations are done correctly, so in
3429    some cases we force the original symbol to be used.  */
3430
3431 int
3432 tc_i386_fix_adjustable (fixS *fixP ATTRIBUTE_UNUSED)
3433 {
3434 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3435   if (!IS_ELF)
3436     return 1;
3437
3438   /* Don't adjust pc-relative references to merge sections in 64-bit
3439      mode.  */
3440   if (use_rela_relocations
3441       && (S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_MERGE) != 0
3442       && fixP->fx_pcrel)
3443     return 0;
3444
3445   /* The x86_64 GOTPCREL are represented as 32bit PCrel relocations
3446      and changed later by validate_fix.  */
3447   if (GOT_symbol && fixP->fx_subsy == GOT_symbol
3448       && fixP->fx_r_type == BFD_RELOC_32_PCREL)
3449     return 0;
3450
3451   /* Adjust_reloc_syms doesn't know about the GOT.  Need to keep symbol
3452      for size relocations.  */
3453   if (fixP->fx_r_type == BFD_RELOC_SIZE32
3454       || fixP->fx_r_type == BFD_RELOC_SIZE64
3455       || fixP->fx_r_type == BFD_RELOC_386_GOTOFF
3456       || fixP->fx_r_type == BFD_RELOC_386_GOT32
3457       || fixP->fx_r_type == BFD_RELOC_386_GOT32X
3458       || fixP->fx_r_type == BFD_RELOC_386_TLS_GD
3459       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDM
3460       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDO_32
3461       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE_32
3462       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE
3463       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTIE
3464       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE_32
3465       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE
3466       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTDESC
3467       || fixP->fx_r_type == BFD_RELOC_386_TLS_DESC_CALL
3468       || fixP->fx_r_type == BFD_RELOC_X86_64_GOT32
3469       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCREL
3470       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCRELX
3471       || fixP->fx_r_type == BFD_RELOC_X86_64_REX_GOTPCRELX
3472       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSGD
3473       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSLD
3474       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF32
3475       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF64
3476       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF
3477       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF32
3478       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF64
3479       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTOFF64
3480       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
3481       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSDESC_CALL
3482       || fixP->fx_r_type == BFD_RELOC_VTABLE_INHERIT
3483       || fixP->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
3484     return 0;
3485 #endif
3486   return 1;
3487 }
3488
3489 static INLINE bool
3490 want_disp32 (const insn_template *t)
3491 {
3492   return flag_code != CODE_64BIT
3493          || i.prefix[ADDR_PREFIX]
3494          || (t->base_opcode == 0x8d
3495              && t->opcode_modifier.opcodespace == SPACE_BASE
3496              && (!i.types[1].bitfield.qword
3497                 || t->opcode_modifier.size == SIZE32));
3498 }
3499
3500 static int
3501 intel_float_operand (const char *mnemonic)
3502 {
3503   /* Note that the value returned is meaningful only for opcodes with (memory)
3504      operands, hence the code here is free to improperly handle opcodes that
3505      have no operands (for better performance and smaller code). */
3506
3507   if (mnemonic[0] != 'f')
3508     return 0; /* non-math */
3509
3510   switch (mnemonic[1])
3511     {
3512     /* fclex, fdecstp, fdisi, femms, feni, fincstp, finit, fsetpm, and
3513        the fs segment override prefix not currently handled because no
3514        call path can make opcodes without operands get here */
3515     case 'i':
3516       return 2 /* integer op */;
3517     case 'l':
3518       if (mnemonic[2] == 'd' && (mnemonic[3] == 'c' || mnemonic[3] == 'e'))
3519         return 3; /* fldcw/fldenv */
3520       break;
3521     case 'n':
3522       if (mnemonic[2] != 'o' /* fnop */)
3523         return 3; /* non-waiting control op */
3524       break;
3525     case 'r':
3526       if (mnemonic[2] == 's')
3527         return 3; /* frstor/frstpm */
3528       break;
3529     case 's':
3530       if (mnemonic[2] == 'a')
3531         return 3; /* fsave */
3532       if (mnemonic[2] == 't')
3533         {
3534           switch (mnemonic[3])
3535             {
3536             case 'c': /* fstcw */
3537             case 'd': /* fstdw */
3538             case 'e': /* fstenv */
3539             case 's': /* fsts[gw] */
3540               return 3;
3541             }
3542         }
3543       break;
3544     case 'x':
3545       if (mnemonic[2] == 'r' || mnemonic[2] == 's')
3546         return 0; /* fxsave/fxrstor are not really math ops */
3547       break;
3548     }
3549
3550   return 1;
3551 }
3552
3553 static INLINE void
3554 install_template (const insn_template *t)
3555 {
3556   unsigned int l;
3557
3558   i.tm = *t;
3559
3560   /* Note that for pseudo prefixes this produces a length of 1. But for them
3561      the length isn't interesting at all.  */
3562   for (l = 1; l < 4; ++l)
3563     if (!(t->base_opcode >> (8 * l)))
3564       break;
3565
3566   i.opcode_length = l;
3567 }
3568
3569 /* Build the VEX prefix.  */
3570
3571 static void
3572 build_vex_prefix (const insn_template *t)
3573 {
3574   unsigned int register_specifier;
3575   unsigned int vector_length;
3576   unsigned int w;
3577
3578   /* Check register specifier.  */
3579   if (i.vex.register_specifier)
3580     {
3581       register_specifier =
3582         ~register_number (i.vex.register_specifier) & 0xf;
3583       gas_assert ((i.vex.register_specifier->reg_flags & RegVRex) == 0);
3584     }
3585   else
3586     register_specifier = 0xf;
3587
3588   /* Use 2-byte VEX prefix by swapping destination and source operand
3589      if there are more than 1 register operand.  */
3590   if (i.reg_operands > 1
3591       && i.vec_encoding != vex_encoding_vex3
3592       && i.dir_encoding == dir_encoding_default
3593       && i.operands == i.reg_operands
3594       && operand_type_equal (&i.types[0], &i.types[i.operands - 1])
3595       && i.tm.opcode_modifier.opcodespace == SPACE_0F
3596       && (i.tm.opcode_modifier.load || i.tm.opcode_modifier.d)
3597       && i.rex == REX_B)
3598     {
3599       unsigned int xchg = i.operands - 1;
3600       union i386_op temp_op;
3601       i386_operand_type temp_type;
3602
3603       temp_type = i.types[xchg];
3604       i.types[xchg] = i.types[0];
3605       i.types[0] = temp_type;
3606       temp_op = i.op[xchg];
3607       i.op[xchg] = i.op[0];
3608       i.op[0] = temp_op;
3609
3610       gas_assert (i.rm.mode == 3);
3611
3612       i.rex = REX_R;
3613       xchg = i.rm.regmem;
3614       i.rm.regmem = i.rm.reg;
3615       i.rm.reg = xchg;
3616
3617       if (i.tm.opcode_modifier.d)
3618         i.tm.base_opcode ^= (i.tm.base_opcode & 0xee) != 0x6e
3619                             ? Opcode_ExtD : Opcode_SIMD_IntD;
3620       else /* Use the next insn.  */
3621         install_template (&t[1]);
3622     }
3623
3624   /* Use 2-byte VEX prefix by swapping commutative source operands if there
3625      are no memory operands and at least 3 register ones.  */
3626   if (i.reg_operands >= 3
3627       && i.vec_encoding != vex_encoding_vex3
3628       && i.reg_operands == i.operands - i.imm_operands
3629       && i.tm.opcode_modifier.vex
3630       && i.tm.opcode_modifier.commutative
3631       && (i.tm.opcode_modifier.sse2avx || optimize > 1)
3632       && i.rex == REX_B
3633       && i.vex.register_specifier
3634       && !(i.vex.register_specifier->reg_flags & RegRex))
3635     {
3636       unsigned int xchg = i.operands - i.reg_operands;
3637       union i386_op temp_op;
3638       i386_operand_type temp_type;
3639
3640       gas_assert (i.tm.opcode_modifier.opcodespace == SPACE_0F);
3641       gas_assert (!i.tm.opcode_modifier.sae);
3642       gas_assert (operand_type_equal (&i.types[i.operands - 2],
3643                                       &i.types[i.operands - 3]));
3644       gas_assert (i.rm.mode == 3);
3645
3646       temp_type = i.types[xchg];
3647       i.types[xchg] = i.types[xchg + 1];
3648       i.types[xchg + 1] = temp_type;
3649       temp_op = i.op[xchg];
3650       i.op[xchg] = i.op[xchg + 1];
3651       i.op[xchg + 1] = temp_op;
3652
3653       i.rex = 0;
3654       xchg = i.rm.regmem | 8;
3655       i.rm.regmem = ~register_specifier & 0xf;
3656       gas_assert (!(i.rm.regmem & 8));
3657       i.vex.register_specifier += xchg - i.rm.regmem;
3658       register_specifier = ~xchg & 0xf;
3659     }
3660
3661   if (i.tm.opcode_modifier.vex == VEXScalar)
3662     vector_length = avxscalar;
3663   else if (i.tm.opcode_modifier.vex == VEX256)
3664     vector_length = 1;
3665   else
3666     {
3667       unsigned int op;
3668
3669       /* Determine vector length from the last multi-length vector
3670          operand.  */
3671       vector_length = 0;
3672       for (op = t->operands; op--;)
3673         if (t->operand_types[op].bitfield.xmmword
3674             && t->operand_types[op].bitfield.ymmword
3675             && i.types[op].bitfield.ymmword)
3676           {
3677             vector_length = 1;
3678             break;
3679           }
3680     }
3681
3682   /* Check the REX.W bit and VEXW.  */
3683   if (i.tm.opcode_modifier.vexw == VEXWIG)
3684     w = (vexwig == vexw1 || (i.rex & REX_W)) ? 1 : 0;
3685   else if (i.tm.opcode_modifier.vexw)
3686     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3687   else
3688     w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0;
3689
3690   /* Use 2-byte VEX prefix if possible.  */
3691   if (w == 0
3692       && i.vec_encoding != vex_encoding_vex3
3693       && i.tm.opcode_modifier.opcodespace == SPACE_0F
3694       && (i.rex & (REX_W | REX_X | REX_B)) == 0)
3695     {
3696       /* 2-byte VEX prefix.  */
3697       unsigned int r;
3698
3699       i.vex.length = 2;
3700       i.vex.bytes[0] = 0xc5;
3701
3702       /* Check the REX.R bit.  */
3703       r = (i.rex & REX_R) ? 0 : 1;
3704       i.vex.bytes[1] = (r << 7
3705                         | register_specifier << 3
3706                         | vector_length << 2
3707                         | i.tm.opcode_modifier.opcodeprefix);
3708     }
3709   else
3710     {
3711       /* 3-byte VEX prefix.  */
3712       i.vex.length = 3;
3713
3714       switch (i.tm.opcode_modifier.opcodespace)
3715         {
3716         case SPACE_0F:
3717         case SPACE_0F38:
3718         case SPACE_0F3A:
3719           i.vex.bytes[0] = 0xc4;
3720           break;
3721         case SPACE_XOP08:
3722         case SPACE_XOP09:
3723         case SPACE_XOP0A:
3724           i.vex.bytes[0] = 0x8f;
3725           break;
3726         default:
3727           abort ();
3728         }
3729
3730       /* The high 3 bits of the second VEX byte are 1's compliment
3731          of RXB bits from REX.  */
3732       i.vex.bytes[1] = (~i.rex & 0x7) << 5 | i.tm.opcode_modifier.opcodespace;
3733
3734       i.vex.bytes[2] = (w << 7
3735                         | register_specifier << 3
3736                         | vector_length << 2
3737                         | i.tm.opcode_modifier.opcodeprefix);
3738     }
3739 }
3740
3741 static INLINE bool
3742 is_evex_encoding (const insn_template *t)
3743 {
3744   return t->opcode_modifier.evex || t->opcode_modifier.disp8memshift
3745          || t->opcode_modifier.broadcast || t->opcode_modifier.masking
3746          || t->opcode_modifier.sae;
3747 }
3748
3749 static INLINE bool
3750 is_any_vex_encoding (const insn_template *t)
3751 {
3752   return t->opcode_modifier.vex || is_evex_encoding (t);
3753 }
3754
3755 static unsigned int
3756 get_broadcast_bytes (const insn_template *t, bool diag)
3757 {
3758   unsigned int op, bytes;
3759   const i386_operand_type *types;
3760
3761   if (i.broadcast.type)
3762     return i.broadcast.bytes = ((1 << (t->opcode_modifier.broadcast - 1))
3763                                 * i.broadcast.type);
3764
3765   gas_assert (intel_syntax);
3766
3767   for (op = 0; op < t->operands; ++op)
3768     if (t->operand_types[op].bitfield.baseindex)
3769       break;
3770
3771   gas_assert (op < t->operands);
3772
3773   if (t->opcode_modifier.evex
3774       && t->opcode_modifier.evex != EVEXDYN)
3775     switch (i.broadcast.bytes)
3776       {
3777       case 1:
3778         if (t->operand_types[op].bitfield.word)
3779           return 2;
3780       /* Fall through.  */
3781       case 2:
3782         if (t->operand_types[op].bitfield.dword)
3783           return 4;
3784       /* Fall through.  */
3785       case 4:
3786         if (t->operand_types[op].bitfield.qword)
3787           return 8;
3788       /* Fall through.  */
3789       case 8:
3790         if (t->operand_types[op].bitfield.xmmword)
3791           return 16;
3792         if (t->operand_types[op].bitfield.ymmword)
3793           return 32;
3794         if (t->operand_types[op].bitfield.zmmword)
3795           return 64;
3796       /* Fall through.  */
3797       default:
3798         abort ();
3799       }
3800
3801   gas_assert (op + 1 < t->operands);
3802
3803   if (t->operand_types[op + 1].bitfield.xmmword
3804       + t->operand_types[op + 1].bitfield.ymmword
3805       + t->operand_types[op + 1].bitfield.zmmword > 1)
3806     {
3807       types = &i.types[op + 1];
3808       diag = false;
3809     }
3810   else /* Ambiguous - guess with a preference to non-AVX512VL forms.  */
3811     types = &t->operand_types[op];
3812
3813   if (types->bitfield.zmmword)
3814     bytes = 64;
3815   else if (types->bitfield.ymmword)
3816     bytes = 32;
3817   else
3818     bytes = 16;
3819
3820   if (diag)
3821     as_warn (_("ambiguous broadcast for `%s', using %u-bit form"),
3822              t->name, bytes * 8);
3823
3824   return bytes;
3825 }
3826
3827 /* Build the EVEX prefix.  */
3828
3829 static void
3830 build_evex_prefix (void)
3831 {
3832   unsigned int register_specifier, w;
3833   rex_byte vrex_used = 0;
3834
3835   /* Check register specifier.  */
3836   if (i.vex.register_specifier)
3837     {
3838       gas_assert ((i.vrex & REX_X) == 0);
3839
3840       register_specifier = i.vex.register_specifier->reg_num;
3841       if ((i.vex.register_specifier->reg_flags & RegRex))
3842         register_specifier += 8;
3843       /* The upper 16 registers are encoded in the fourth byte of the
3844          EVEX prefix.  */
3845       if (!(i.vex.register_specifier->reg_flags & RegVRex))
3846         i.vex.bytes[3] = 0x8;
3847       register_specifier = ~register_specifier & 0xf;
3848     }
3849   else
3850     {
3851       register_specifier = 0xf;
3852
3853       /* Encode upper 16 vector index register in the fourth byte of
3854          the EVEX prefix.  */
3855       if (!(i.vrex & REX_X))
3856         i.vex.bytes[3] = 0x8;
3857       else
3858         vrex_used |= REX_X;
3859     }
3860
3861   /* 4 byte EVEX prefix.  */
3862   i.vex.length = 4;
3863   i.vex.bytes[0] = 0x62;
3864
3865   /* The high 3 bits of the second EVEX byte are 1's compliment of RXB
3866      bits from REX.  */
3867   gas_assert (i.tm.opcode_modifier.opcodespace >= SPACE_0F);
3868   gas_assert (i.tm.opcode_modifier.opcodespace <= SPACE_EVEXMAP6);
3869   i.vex.bytes[1] = (~i.rex & 0x7) << 5 | i.tm.opcode_modifier.opcodespace;
3870
3871   /* The fifth bit of the second EVEX byte is 1's compliment of the
3872      REX_R bit in VREX.  */
3873   if (!(i.vrex & REX_R))
3874     i.vex.bytes[1] |= 0x10;
3875   else
3876     vrex_used |= REX_R;
3877
3878   if ((i.reg_operands + i.imm_operands) == i.operands)
3879     {
3880       /* When all operands are registers, the REX_X bit in REX is not
3881          used.  We reuse it to encode the upper 16 registers, which is
3882          indicated by the REX_B bit in VREX.  The REX_X bit is encoded
3883          as 1's compliment.  */
3884       if ((i.vrex & REX_B))
3885         {
3886           vrex_used |= REX_B;
3887           i.vex.bytes[1] &= ~0x40;
3888         }
3889     }
3890
3891   /* EVEX instructions shouldn't need the REX prefix.  */
3892   i.vrex &= ~vrex_used;
3893   gas_assert (i.vrex == 0);
3894
3895   /* Check the REX.W bit and VEXW.  */
3896   if (i.tm.opcode_modifier.vexw == VEXWIG)
3897     w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;
3898   else if (i.tm.opcode_modifier.vexw)
3899     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3900   else
3901     w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
3902
3903   /* The third byte of the EVEX prefix.  */
3904   i.vex.bytes[2] = ((w << 7)
3905                     | (register_specifier << 3)
3906                     | 4 /* Encode the U bit.  */
3907                     | i.tm.opcode_modifier.opcodeprefix);
3908
3909   /* The fourth byte of the EVEX prefix.  */
3910   /* The zeroing-masking bit.  */
3911   if (i.mask.reg && i.mask.zeroing)
3912     i.vex.bytes[3] |= 0x80;
3913
3914   /* Don't always set the broadcast bit if there is no RC.  */
3915   if (i.rounding.type == rc_none)
3916     {
3917       /* Encode the vector length.  */
3918       unsigned int vec_length;
3919
3920       if (!i.tm.opcode_modifier.evex
3921           || i.tm.opcode_modifier.evex == EVEXDYN)
3922         {
3923           unsigned int op;
3924
3925           /* Determine vector length from the last multi-length vector
3926              operand.  */
3927           for (op = i.operands; op--;)
3928             if (i.tm.operand_types[op].bitfield.xmmword
3929                 + i.tm.operand_types[op].bitfield.ymmword
3930                 + i.tm.operand_types[op].bitfield.zmmword > 1)
3931               {
3932                 if (i.types[op].bitfield.zmmword)
3933                   {
3934                     i.tm.opcode_modifier.evex = EVEX512;
3935                     break;
3936                   }
3937                 else if (i.types[op].bitfield.ymmword)
3938                   {
3939                     i.tm.opcode_modifier.evex = EVEX256;
3940                     break;
3941                   }
3942                 else if (i.types[op].bitfield.xmmword)
3943                   {
3944                     i.tm.opcode_modifier.evex = EVEX128;
3945                     break;
3946                   }
3947                 else if (i.broadcast.bytes && op == i.broadcast.operand)
3948                   {
3949                     switch (get_broadcast_bytes (&i.tm, true))
3950                       {
3951                         case 64:
3952                           i.tm.opcode_modifier.evex = EVEX512;
3953                           break;
3954                         case 32:
3955                           i.tm.opcode_modifier.evex = EVEX256;
3956                           break;
3957                         case 16:
3958                           i.tm.opcode_modifier.evex = EVEX128;
3959                           break;
3960                         default:
3961                           abort ();
3962                       }
3963                     break;
3964                   }
3965               }
3966
3967           if (op >= MAX_OPERANDS)
3968             abort ();
3969         }
3970
3971       switch (i.tm.opcode_modifier.evex)
3972         {
3973         case EVEXLIG: /* LL' is ignored */
3974           vec_length = evexlig << 5;
3975           break;
3976         case EVEX128:
3977           vec_length = 0 << 5;
3978           break;
3979         case EVEX256:
3980           vec_length = 1 << 5;
3981           break;
3982         case EVEX512:
3983           vec_length = 2 << 5;
3984           break;
3985         default:
3986           abort ();
3987           break;
3988         }
3989       i.vex.bytes[3] |= vec_length;
3990       /* Encode the broadcast bit.  */
3991       if (i.broadcast.bytes)
3992         i.vex.bytes[3] |= 0x10;
3993     }
3994   else if (i.rounding.type != saeonly)
3995     i.vex.bytes[3] |= 0x10 | (i.rounding.type << 5);
3996   else
3997     i.vex.bytes[3] |= 0x10 | (evexrcig << 5);
3998
3999   if (i.mask.reg)
4000     i.vex.bytes[3] |= i.mask.reg->reg_num;
4001 }
4002
4003 static void
4004 process_immext (void)
4005 {
4006   expressionS *exp;
4007
4008   /* These AMD 3DNow! and SSE2 instructions have an opcode suffix
4009      which is coded in the same place as an 8-bit immediate field
4010      would be.  Here we fake an 8-bit immediate operand from the
4011      opcode suffix stored in tm.extension_opcode.
4012
4013      AVX instructions also use this encoding, for some of
4014      3 argument instructions.  */
4015
4016   gas_assert (i.imm_operands <= 1
4017               && (i.operands <= 2
4018                   || (is_any_vex_encoding (&i.tm)
4019                       && i.operands <= 4)));
4020
4021   exp = &im_expressions[i.imm_operands++];
4022   i.op[i.operands].imms = exp;
4023   i.types[i.operands].bitfield.imm8 = 1;
4024   i.operands++;
4025   exp->X_op = O_constant;
4026   exp->X_add_number = i.tm.extension_opcode;
4027   i.tm.extension_opcode = None;
4028 }
4029
4030
4031 static int
4032 check_hle (void)
4033 {
4034   switch (i.tm.opcode_modifier.prefixok)
4035     {
4036     default:
4037       abort ();
4038     case PrefixLock:
4039     case PrefixNone:
4040     case PrefixNoTrack:
4041     case PrefixRep:
4042       as_bad (_("invalid instruction `%s' after `%s'"),
4043               i.tm.name, i.hle_prefix);
4044       return 0;
4045     case PrefixHLELock:
4046       if (i.prefix[LOCK_PREFIX])
4047         return 1;
4048       as_bad (_("missing `lock' with `%s'"), i.hle_prefix);
4049       return 0;
4050     case PrefixHLEAny:
4051       return 1;
4052     case PrefixHLERelease:
4053       if (i.prefix[HLE_PREFIX] != XRELEASE_PREFIX_OPCODE)
4054         {
4055           as_bad (_("instruction `%s' after `xacquire' not allowed"),
4056                   i.tm.name);
4057           return 0;
4058         }
4059       if (i.mem_operands == 0 || !(i.flags[i.operands - 1] & Operand_Mem))
4060         {
4061           as_bad (_("memory destination needed for instruction `%s'"
4062                     " after `xrelease'"), i.tm.name);
4063           return 0;
4064         }
4065       return 1;
4066     }
4067 }
4068
4069 /* Encode aligned vector move as unaligned vector move.  */
4070
4071 static void
4072 encode_with_unaligned_vector_move (void)
4073 {
4074   switch (i.tm.base_opcode)
4075     {
4076     case 0x28:  /* Load instructions.  */
4077     case 0x29:  /* Store instructions.  */
4078       /* movaps/movapd/vmovaps/vmovapd.  */
4079       if (i.tm.opcode_modifier.opcodespace == SPACE_0F
4080           && i.tm.opcode_modifier.opcodeprefix <= PREFIX_0X66)
4081         i.tm.base_opcode = 0x10 | (i.tm.base_opcode & 1);
4082       break;
4083     case 0x6f:  /* Load instructions.  */
4084     case 0x7f:  /* Store instructions.  */
4085       /* movdqa/vmovdqa/vmovdqa64/vmovdqa32. */
4086       if (i.tm.opcode_modifier.opcodespace == SPACE_0F
4087           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0X66)
4088         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4089       break;
4090     default:
4091       break;
4092     }
4093 }
4094
4095 /* Try the shortest encoding by shortening operand size.  */
4096
4097 static void
4098 optimize_encoding (void)
4099 {
4100   unsigned int j;
4101
4102   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
4103       && i.tm.base_opcode == 0x8d)
4104     {
4105       /* Optimize: -O:
4106            lea symbol, %rN    -> mov $symbol, %rN
4107            lea (%rM), %rN     -> mov %rM, %rN
4108            lea (,%rM,1), %rN  -> mov %rM, %rN
4109
4110            and in 32-bit mode for 16-bit addressing
4111
4112            lea (%rM), %rN     -> movzx %rM, %rN
4113
4114            and in 64-bit mode zap 32-bit addressing in favor of using a
4115            32-bit (or less) destination.
4116        */
4117       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
4118         {
4119           if (!i.op[1].regs->reg_type.bitfield.word)
4120             i.tm.opcode_modifier.size = SIZE32;
4121           i.prefix[ADDR_PREFIX] = 0;
4122         }
4123
4124       if (!i.index_reg && !i.base_reg)
4125         {
4126           /* Handle:
4127                lea symbol, %rN    -> mov $symbol, %rN
4128            */
4129           if (flag_code == CODE_64BIT)
4130             {
4131               /* Don't transform a relocation to a 16-bit one.  */
4132               if (i.op[0].disps
4133                   && i.op[0].disps->X_op != O_constant
4134                   && i.op[1].regs->reg_type.bitfield.word)
4135                 return;
4136
4137               if (!i.op[1].regs->reg_type.bitfield.qword
4138                   || i.tm.opcode_modifier.size == SIZE32)
4139                 {
4140                   i.tm.base_opcode = 0xb8;
4141                   i.tm.opcode_modifier.modrm = 0;
4142                   if (!i.op[1].regs->reg_type.bitfield.word)
4143                     i.types[0].bitfield.imm32 = 1;
4144                   else
4145                     {
4146                       i.tm.opcode_modifier.size = SIZE16;
4147                       i.types[0].bitfield.imm16 = 1;
4148                     }
4149                 }
4150               else
4151                 {
4152                   /* Subject to further optimization below.  */
4153                   i.tm.base_opcode = 0xc7;
4154                   i.tm.extension_opcode = 0;
4155                   i.types[0].bitfield.imm32s = 1;
4156                   i.types[0].bitfield.baseindex = 0;
4157                 }
4158             }
4159           /* Outside of 64-bit mode address and operand sizes have to match if
4160              a relocation is involved, as otherwise we wouldn't (currently) or
4161              even couldn't express the relocation correctly.  */
4162           else if (i.op[0].disps
4163                    && i.op[0].disps->X_op != O_constant
4164                    && ((!i.prefix[ADDR_PREFIX])
4165                        != (flag_code == CODE_32BIT
4166                            ? i.op[1].regs->reg_type.bitfield.dword
4167                            : i.op[1].regs->reg_type.bitfield.word)))
4168             return;
4169           /* In 16-bit mode converting LEA with 16-bit addressing and a 32-bit
4170              destination is going to grow encoding size.  */
4171           else if (flag_code == CODE_16BIT
4172                    && (optimize <= 1 || optimize_for_space)
4173                    && !i.prefix[ADDR_PREFIX]
4174                    && i.op[1].regs->reg_type.bitfield.dword)
4175             return;
4176           else
4177             {
4178               i.tm.base_opcode = 0xb8;
4179               i.tm.opcode_modifier.modrm = 0;
4180               if (i.op[1].regs->reg_type.bitfield.dword)
4181                 i.types[0].bitfield.imm32 = 1;
4182               else
4183                 i.types[0].bitfield.imm16 = 1;
4184
4185               if (i.op[0].disps
4186                   && i.op[0].disps->X_op == O_constant
4187                   && i.op[1].regs->reg_type.bitfield.dword
4188                   /* NB: Add () to !i.prefix[ADDR_PREFIX] to silence
4189                      GCC 5. */
4190                   && (!i.prefix[ADDR_PREFIX]) != (flag_code == CODE_32BIT))
4191                 i.op[0].disps->X_add_number &= 0xffff;
4192             }
4193
4194           i.tm.operand_types[0] = i.types[0];
4195           i.imm_operands = 1;
4196           if (!i.op[0].imms)
4197             {
4198               i.op[0].imms = &im_expressions[0];
4199               i.op[0].imms->X_op = O_absent;
4200             }
4201         }
4202       else if (i.op[0].disps
4203                   && (i.op[0].disps->X_op != O_constant
4204                       || i.op[0].disps->X_add_number))
4205         return;
4206       else
4207         {
4208           /* Handle:
4209                lea (%rM), %rN     -> mov %rM, %rN
4210                lea (,%rM,1), %rN  -> mov %rM, %rN
4211                lea (%rM), %rN     -> movzx %rM, %rN
4212            */
4213           const reg_entry *addr_reg;
4214
4215           if (!i.index_reg && i.base_reg->reg_num != RegIP)
4216             addr_reg = i.base_reg;
4217           else if (!i.base_reg
4218                    && i.index_reg->reg_num != RegIZ
4219                    && !i.log2_scale_factor)
4220             addr_reg = i.index_reg;
4221           else
4222             return;
4223
4224           if (addr_reg->reg_type.bitfield.word
4225               && i.op[1].regs->reg_type.bitfield.dword)
4226             {
4227               if (flag_code != CODE_32BIT)
4228                 return;
4229               i.tm.opcode_modifier.opcodespace = SPACE_0F;
4230               i.tm.base_opcode = 0xb7;
4231             }
4232           else
4233             i.tm.base_opcode = 0x8b;
4234
4235           if (addr_reg->reg_type.bitfield.dword
4236               && i.op[1].regs->reg_type.bitfield.qword)
4237             i.tm.opcode_modifier.size = SIZE32;
4238
4239           i.op[0].regs = addr_reg;
4240           i.reg_operands = 2;
4241         }
4242
4243       i.mem_operands = 0;
4244       i.disp_operands = 0;
4245       i.prefix[ADDR_PREFIX] = 0;
4246       i.prefix[SEG_PREFIX] = 0;
4247       i.seg[0] = NULL;
4248     }
4249
4250   if (optimize_for_space
4251       && i.tm.opcode_modifier.opcodespace == SPACE_BASE
4252       && i.reg_operands == 1
4253       && i.imm_operands == 1
4254       && !i.types[1].bitfield.byte
4255       && i.op[0].imms->X_op == O_constant
4256       && fits_in_imm7 (i.op[0].imms->X_add_number)
4257       && (i.tm.base_opcode == 0xa8
4258           || (i.tm.base_opcode == 0xf6
4259               && i.tm.extension_opcode == 0x0)))
4260     {
4261       /* Optimize: -Os:
4262            test $imm7, %r64/%r32/%r16  -> test $imm7, %r8
4263        */
4264       unsigned int base_regnum = i.op[1].regs->reg_num;
4265       if (flag_code == CODE_64BIT || base_regnum < 4)
4266         {
4267           i.types[1].bitfield.byte = 1;
4268           /* Ignore the suffix.  */
4269           i.suffix = 0;
4270           /* Convert to byte registers.  */
4271           if (i.types[1].bitfield.word)
4272             j = 16;
4273           else if (i.types[1].bitfield.dword)
4274             j = 32;
4275           else
4276             j = 48;
4277           if (!(i.op[1].regs->reg_flags & RegRex) && base_regnum < 4)
4278             j += 8;
4279           i.op[1].regs -= j;
4280         }
4281     }
4282   else if (flag_code == CODE_64BIT
4283            && i.tm.opcode_modifier.opcodespace == SPACE_BASE
4284            && ((i.types[1].bitfield.qword
4285                 && i.reg_operands == 1
4286                 && i.imm_operands == 1
4287                 && i.op[0].imms->X_op == O_constant
4288                 && ((i.tm.base_opcode == 0xb8
4289                      && i.tm.extension_opcode == None
4290                      && fits_in_unsigned_long (i.op[0].imms->X_add_number))
4291                     || (fits_in_imm31 (i.op[0].imms->X_add_number)
4292                         && ((i.tm.base_opcode == 0x24
4293                              || i.tm.base_opcode == 0xa8)
4294                             || (i.tm.base_opcode == 0x80
4295                                 && i.tm.extension_opcode == 0x4)
4296                             || ((i.tm.base_opcode == 0xf6
4297                                  || (i.tm.base_opcode | 1) == 0xc7)
4298                                 && i.tm.extension_opcode == 0x0)))
4299                     || (fits_in_imm7 (i.op[0].imms->X_add_number)
4300                         && i.tm.base_opcode == 0x83
4301                         && i.tm.extension_opcode == 0x4)))
4302                || (i.types[0].bitfield.qword
4303                    && ((i.reg_operands == 2
4304                         && i.op[0].regs == i.op[1].regs
4305                         && (i.tm.base_opcode == 0x30
4306                             || i.tm.base_opcode == 0x28))
4307                        || (i.reg_operands == 1
4308                            && i.operands == 1
4309                            && i.tm.base_opcode == 0x30)))))
4310     {
4311       /* Optimize: -O:
4312            andq $imm31, %r64   -> andl $imm31, %r32
4313            andq $imm7, %r64    -> andl $imm7, %r32
4314            testq $imm31, %r64  -> testl $imm31, %r32
4315            xorq %r64, %r64     -> xorl %r32, %r32
4316            subq %r64, %r64     -> subl %r32, %r32
4317            movq $imm31, %r64   -> movl $imm31, %r32
4318            movq $imm32, %r64   -> movl $imm32, %r32
4319         */
4320       i.tm.opcode_modifier.norex64 = 1;
4321       if (i.tm.base_opcode == 0xb8 || (i.tm.base_opcode | 1) == 0xc7)
4322         {
4323           /* Handle
4324                movq $imm31, %r64   -> movl $imm31, %r32
4325                movq $imm32, %r64   -> movl $imm32, %r32
4326            */
4327           i.tm.operand_types[0].bitfield.imm32 = 1;
4328           i.tm.operand_types[0].bitfield.imm32s = 0;
4329           i.tm.operand_types[0].bitfield.imm64 = 0;
4330           i.types[0].bitfield.imm32 = 1;
4331           i.types[0].bitfield.imm32s = 0;
4332           i.types[0].bitfield.imm64 = 0;
4333           i.types[1].bitfield.dword = 1;
4334           i.types[1].bitfield.qword = 0;
4335           if ((i.tm.base_opcode | 1) == 0xc7)
4336             {
4337               /* Handle
4338                    movq $imm31, %r64   -> movl $imm31, %r32
4339                */
4340               i.tm.base_opcode = 0xb8;
4341               i.tm.extension_opcode = None;
4342               i.tm.opcode_modifier.w = 0;
4343               i.tm.opcode_modifier.modrm = 0;
4344             }
4345         }
4346     }
4347   else if (optimize > 1
4348            && !optimize_for_space
4349            && i.tm.opcode_modifier.opcodespace == SPACE_BASE
4350            && i.reg_operands == 2
4351            && i.op[0].regs == i.op[1].regs
4352            && ((i.tm.base_opcode & ~(Opcode_D | 1)) == 0x8
4353                || (i.tm.base_opcode & ~(Opcode_D | 1)) == 0x20)
4354            && (flag_code != CODE_64BIT || !i.types[0].bitfield.dword))
4355     {
4356       /* Optimize: -O2:
4357            andb %rN, %rN  -> testb %rN, %rN
4358            andw %rN, %rN  -> testw %rN, %rN
4359            andq %rN, %rN  -> testq %rN, %rN
4360            orb %rN, %rN   -> testb %rN, %rN
4361            orw %rN, %rN   -> testw %rN, %rN
4362            orq %rN, %rN   -> testq %rN, %rN
4363
4364            and outside of 64-bit mode
4365
4366            andl %rN, %rN  -> testl %rN, %rN
4367            orl %rN, %rN   -> testl %rN, %rN
4368        */
4369       i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1);
4370     }
4371   else if (i.reg_operands == 3
4372            && i.op[0].regs == i.op[1].regs
4373            && !i.types[2].bitfield.xmmword
4374            && (i.tm.opcode_modifier.vex
4375                || ((!i.mask.reg || i.mask.zeroing)
4376                    && is_evex_encoding (&i.tm)
4377                    && (i.vec_encoding != vex_encoding_evex
4378                        || cpu_arch_isa_flags.bitfield.cpuavx512vl
4379                        || i.tm.cpu_flags.bitfield.cpuavx512vl
4380                        || (i.tm.operand_types[2].bitfield.zmmword
4381                            && i.types[2].bitfield.ymmword))))
4382            && i.tm.opcode_modifier.opcodespace == SPACE_0F
4383            && ((i.tm.base_opcode | 2) == 0x57
4384                || i.tm.base_opcode == 0xdf
4385                || i.tm.base_opcode == 0xef
4386                || (i.tm.base_opcode | 3) == 0xfb
4387                || i.tm.base_opcode == 0x42
4388                || i.tm.base_opcode == 0x47))
4389     {
4390       /* Optimize: -O1:
4391            VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
4392            vpsubq and vpsubw:
4393              EVEX VOP %zmmM, %zmmM, %zmmN
4394                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4395                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4396              EVEX VOP %ymmM, %ymmM, %ymmN
4397                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4398                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4399              VEX VOP %ymmM, %ymmM, %ymmN
4400                -> VEX VOP %xmmM, %xmmM, %xmmN
4401            VOP, one of vpandn and vpxor:
4402              VEX VOP %ymmM, %ymmM, %ymmN
4403                -> VEX VOP %xmmM, %xmmM, %xmmN
4404            VOP, one of vpandnd and vpandnq:
4405              EVEX VOP %zmmM, %zmmM, %zmmN
4406                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4407                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4408              EVEX VOP %ymmM, %ymmM, %ymmN
4409                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4410                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4411            VOP, one of vpxord and vpxorq:
4412              EVEX VOP %zmmM, %zmmM, %zmmN
4413                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4414                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4415              EVEX VOP %ymmM, %ymmM, %ymmN
4416                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4417                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4418            VOP, one of kxord and kxorq:
4419              VEX VOP %kM, %kM, %kN
4420                -> VEX kxorw %kM, %kM, %kN
4421            VOP, one of kandnd and kandnq:
4422              VEX VOP %kM, %kM, %kN
4423                -> VEX kandnw %kM, %kM, %kN
4424        */
4425       if (is_evex_encoding (&i.tm))
4426         {
4427           if (i.vec_encoding != vex_encoding_evex)
4428             {
4429               i.tm.opcode_modifier.vex = VEX128;
4430               i.tm.opcode_modifier.vexw = VEXW0;
4431               i.tm.opcode_modifier.evex = 0;
4432             }
4433           else if (optimize > 1)
4434             i.tm.opcode_modifier.evex = EVEX128;
4435           else
4436             return;
4437         }
4438       else if (i.tm.operand_types[0].bitfield.class == RegMask)
4439         {
4440           i.tm.opcode_modifier.opcodeprefix = PREFIX_NONE;
4441           i.tm.opcode_modifier.vexw = VEXW0;
4442         }
4443       else
4444         i.tm.opcode_modifier.vex = VEX128;
4445
4446       if (i.tm.opcode_modifier.vex)
4447         for (j = 0; j < 3; j++)
4448           {
4449             i.types[j].bitfield.xmmword = 1;
4450             i.types[j].bitfield.ymmword = 0;
4451           }
4452     }
4453   else if (i.vec_encoding != vex_encoding_evex
4454            && !i.types[0].bitfield.zmmword
4455            && !i.types[1].bitfield.zmmword
4456            && !i.mask.reg
4457            && !i.broadcast.bytes
4458            && is_evex_encoding (&i.tm)
4459            && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4460                || (i.tm.base_opcode & ~4) == 0xdb
4461                || (i.tm.base_opcode & ~4) == 0xeb)
4462            && i.tm.extension_opcode == None)
4463     {
4464       /* Optimize: -O1:
4465            VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
4466            vmovdqu32 and vmovdqu64:
4467              EVEX VOP %xmmM, %xmmN
4468                -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
4469              EVEX VOP %ymmM, %ymmN
4470                -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
4471              EVEX VOP %xmmM, mem
4472                -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
4473              EVEX VOP %ymmM, mem
4474                -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
4475              EVEX VOP mem, %xmmN
4476                -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
4477              EVEX VOP mem, %ymmN
4478                -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
4479            VOP, one of vpand, vpandn, vpor, vpxor:
4480              EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
4481                -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
4482              EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
4483                -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
4484              EVEX VOP{d,q} mem, %xmmM, %xmmN
4485                -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
4486              EVEX VOP{d,q} mem, %ymmM, %ymmN
4487                -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
4488        */
4489       for (j = 0; j < i.operands; j++)
4490         if (operand_type_check (i.types[j], disp)
4491             && i.op[j].disps->X_op == O_constant)
4492           {
4493             /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
4494                has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
4495                bytes, we choose EVEX Disp8 over VEX Disp32.  */
4496             int evex_disp8, vex_disp8;
4497             unsigned int memshift = i.memshift;
4498             offsetT n = i.op[j].disps->X_add_number;
4499
4500             evex_disp8 = fits_in_disp8 (n);
4501             i.memshift = 0;
4502             vex_disp8 = fits_in_disp8 (n);
4503             if (evex_disp8 != vex_disp8)
4504               {
4505                 i.memshift = memshift;
4506                 return;
4507               }
4508
4509             i.types[j].bitfield.disp8 = vex_disp8;
4510             break;
4511           }
4512       if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4513           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2)
4514         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4515       i.tm.opcode_modifier.vex
4516         = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
4517       i.tm.opcode_modifier.vexw = VEXW0;
4518       /* VPAND, VPOR, and VPXOR are commutative.  */
4519       if (i.reg_operands == 3 && i.tm.base_opcode != 0xdf)
4520         i.tm.opcode_modifier.commutative = 1;
4521       i.tm.opcode_modifier.evex = 0;
4522       i.tm.opcode_modifier.masking = 0;
4523       i.tm.opcode_modifier.broadcast = 0;
4524       i.tm.opcode_modifier.disp8memshift = 0;
4525       i.memshift = 0;
4526       if (j < i.operands)
4527         i.types[j].bitfield.disp8
4528           = fits_in_disp8 (i.op[j].disps->X_add_number);
4529     }
4530 }
4531
4532 /* Return non-zero for load instruction.  */
4533
4534 static int
4535 load_insn_p (void)
4536 {
4537   unsigned int dest;
4538   int any_vex_p = is_any_vex_encoding (&i.tm);
4539   unsigned int base_opcode = i.tm.base_opcode | 1;
4540
4541   if (!any_vex_p)
4542     {
4543       /* Anysize insns: lea, invlpg, clflush, prefetch*, bndmk, bndcl, bndcu,
4544          bndcn, bndstx, bndldx, clflushopt, clwb, cldemote.  */
4545       if (i.tm.opcode_modifier.operandconstraint == ANY_SIZE)
4546         return 0;
4547
4548       /* pop.   */
4549       if (strcmp (i.tm.name, "pop") == 0)
4550         return 1;
4551     }
4552
4553   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE)
4554     {
4555       /* popf, popa.   */
4556       if (i.tm.base_opcode == 0x9d
4557           || i.tm.base_opcode == 0x61)
4558         return 1;
4559
4560       /* movs, cmps, lods, scas.  */
4561       if ((i.tm.base_opcode | 0xb) == 0xaf)
4562         return 1;
4563
4564       /* outs, xlatb.  */
4565       if (base_opcode == 0x6f
4566           || i.tm.base_opcode == 0xd7)
4567         return 1;
4568       /* NB: For AMD-specific insns with implicit memory operands,
4569          they're intentionally not covered.  */
4570     }
4571
4572   /* No memory operand.  */
4573   if (!i.mem_operands)
4574     return 0;
4575
4576   if (any_vex_p)
4577     {
4578       /* vldmxcsr.  */
4579       if (i.tm.base_opcode == 0xae
4580           && i.tm.opcode_modifier.vex
4581           && i.tm.opcode_modifier.opcodespace == SPACE_0F
4582           && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
4583           && i.tm.extension_opcode == 2)
4584         return 1;
4585     }
4586   else if (i.tm.opcode_modifier.opcodespace == SPACE_BASE)
4587     {
4588       /* test, not, neg, mul, imul, div, idiv.  */
4589       if ((i.tm.base_opcode == 0xf6 || i.tm.base_opcode == 0xf7)
4590           && i.tm.extension_opcode != 1)
4591         return 1;
4592
4593       /* inc, dec.  */
4594       if (base_opcode == 0xff && i.tm.extension_opcode <= 1)
4595         return 1;
4596
4597       /* add, or, adc, sbb, and, sub, xor, cmp.  */
4598       if (i.tm.base_opcode >= 0x80 && i.tm.base_opcode <= 0x83)
4599         return 1;
4600
4601       /* rol, ror, rcl, rcr, shl/sal, shr, sar. */
4602       if ((base_opcode == 0xc1
4603            || (i.tm.base_opcode >= 0xd0 && i.tm.base_opcode <= 0xd3))
4604           && i.tm.extension_opcode != 6)
4605         return 1;
4606
4607       /* Check for x87 instructions.  */
4608       if (base_opcode >= 0xd8 && base_opcode <= 0xdf)
4609         {
4610           /* Skip fst, fstp, fstenv, fstcw.  */
4611           if (i.tm.base_opcode == 0xd9
4612               && (i.tm.extension_opcode == 2
4613                   || i.tm.extension_opcode == 3
4614                   || i.tm.extension_opcode == 6
4615                   || i.tm.extension_opcode == 7))
4616             return 0;
4617
4618           /* Skip fisttp, fist, fistp, fstp.  */
4619           if (i.tm.base_opcode == 0xdb
4620               && (i.tm.extension_opcode == 1
4621                   || i.tm.extension_opcode == 2
4622                   || i.tm.extension_opcode == 3
4623                   || i.tm.extension_opcode == 7))
4624             return 0;
4625
4626           /* Skip fisttp, fst, fstp, fsave, fstsw.  */
4627           if (i.tm.base_opcode == 0xdd
4628               && (i.tm.extension_opcode == 1
4629                   || i.tm.extension_opcode == 2
4630                   || i.tm.extension_opcode == 3
4631                   || i.tm.extension_opcode == 6
4632                   || i.tm.extension_opcode == 7))
4633             return 0;
4634
4635           /* Skip fisttp, fist, fistp, fbstp, fistp.  */
4636           if (i.tm.base_opcode == 0xdf
4637               && (i.tm.extension_opcode == 1
4638                   || i.tm.extension_opcode == 2
4639                   || i.tm.extension_opcode == 3
4640                   || i.tm.extension_opcode == 6
4641                   || i.tm.extension_opcode == 7))
4642             return 0;
4643
4644           return 1;
4645         }
4646     }
4647   else if (i.tm.opcode_modifier.opcodespace == SPACE_0F)
4648     {
4649       /* bt, bts, btr, btc.  */
4650       if (i.tm.base_opcode == 0xba
4651           && (i.tm.extension_opcode >= 4 && i.tm.extension_opcode <= 7))
4652         return 1;
4653
4654       /* cmpxchg8b, cmpxchg16b, xrstors, vmptrld.  */
4655       if (i.tm.base_opcode == 0xc7
4656           && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
4657           && (i.tm.extension_opcode == 1 || i.tm.extension_opcode == 3
4658               || i.tm.extension_opcode == 6))
4659         return 1;
4660
4661       /* fxrstor, ldmxcsr, xrstor.  */
4662       if (i.tm.base_opcode == 0xae
4663           && (i.tm.extension_opcode == 1
4664               || i.tm.extension_opcode == 2
4665               || i.tm.extension_opcode == 5))
4666         return 1;
4667
4668       /* lgdt, lidt, lmsw.  */
4669       if (i.tm.base_opcode == 0x01
4670           && (i.tm.extension_opcode == 2
4671               || i.tm.extension_opcode == 3
4672               || i.tm.extension_opcode == 6))
4673         return 1;
4674     }
4675
4676   dest = i.operands - 1;
4677
4678   /* Check fake imm8 operand and 3 source operands.  */
4679   if ((i.tm.opcode_modifier.immext
4680        || i.tm.opcode_modifier.vexsources == VEX3SOURCES)
4681       && i.types[dest].bitfield.imm8)
4682     dest--;
4683
4684   /* add, or, adc, sbb, and, sub, xor, cmp, test, xchg.  */
4685   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
4686       && (base_opcode == 0x1
4687           || base_opcode == 0x9
4688           || base_opcode == 0x11
4689           || base_opcode == 0x19
4690           || base_opcode == 0x21
4691           || base_opcode == 0x29
4692           || base_opcode == 0x31
4693           || base_opcode == 0x39
4694           || (base_opcode | 2) == 0x87))
4695     return 1;
4696
4697   /* xadd.  */
4698   if (i.tm.opcode_modifier.opcodespace == SPACE_0F
4699       && base_opcode == 0xc1)
4700     return 1;
4701
4702   /* Check for load instruction.  */
4703   return (i.types[dest].bitfield.class != ClassNone
4704           || i.types[dest].bitfield.instance == Accum);
4705 }
4706
4707 /* Output lfence, 0xfaee8, after instruction.  */
4708
4709 static void
4710 insert_lfence_after (void)
4711 {
4712   if (lfence_after_load && load_insn_p ())
4713     {
4714       /* There are also two REP string instructions that require
4715          special treatment. Specifically, the compare string (CMPS)
4716          and scan string (SCAS) instructions set EFLAGS in a manner
4717          that depends on the data being compared/scanned. When used
4718          with a REP prefix, the number of iterations may therefore
4719          vary depending on this data. If the data is a program secret
4720          chosen by the adversary using an LVI method,
4721          then this data-dependent behavior may leak some aspect
4722          of the secret.  */
4723       if (((i.tm.base_opcode | 0x1) == 0xa7
4724            || (i.tm.base_opcode | 0x1) == 0xaf)
4725           && i.prefix[REP_PREFIX])
4726         {
4727             as_warn (_("`%s` changes flags which would affect control flow behavior"),
4728                      i.tm.name);
4729         }
4730       char *p = frag_more (3);
4731       *p++ = 0xf;
4732       *p++ = 0xae;
4733       *p = 0xe8;
4734     }
4735 }
4736
4737 /* Output lfence, 0xfaee8, before instruction.  */
4738
4739 static void
4740 insert_lfence_before (void)
4741 {
4742   char *p;
4743
4744   if (i.tm.opcode_modifier.opcodespace != SPACE_BASE)
4745     return;
4746
4747   if (i.tm.base_opcode == 0xff
4748       && (i.tm.extension_opcode == 2 || i.tm.extension_opcode == 4))
4749     {
4750       /* Insert lfence before indirect branch if needed.  */
4751
4752       if (lfence_before_indirect_branch == lfence_branch_none)
4753         return;
4754
4755       if (i.operands != 1)
4756         abort ();
4757
4758       if (i.reg_operands == 1)
4759         {
4760           /* Indirect branch via register.  Don't insert lfence with
4761              -mlfence-after-load=yes.  */
4762           if (lfence_after_load
4763               || lfence_before_indirect_branch == lfence_branch_memory)
4764             return;
4765         }
4766       else if (i.mem_operands == 1
4767                && lfence_before_indirect_branch != lfence_branch_register)
4768         {
4769           as_warn (_("indirect `%s` with memory operand should be avoided"),
4770                    i.tm.name);
4771           return;
4772         }
4773       else
4774         return;
4775
4776       if (last_insn.kind != last_insn_other
4777           && last_insn.seg == now_seg)
4778         {
4779           as_warn_where (last_insn.file, last_insn.line,
4780                          _("`%s` skips -mlfence-before-indirect-branch on `%s`"),
4781                          last_insn.name, i.tm.name);
4782           return;
4783         }
4784
4785       p = frag_more (3);
4786       *p++ = 0xf;
4787       *p++ = 0xae;
4788       *p = 0xe8;
4789       return;
4790     }
4791
4792   /* Output or/not/shl and lfence before near ret.  */
4793   if (lfence_before_ret != lfence_before_ret_none
4794       && (i.tm.base_opcode == 0xc2
4795           || i.tm.base_opcode == 0xc3))
4796     {
4797       if (last_insn.kind != last_insn_other
4798           && last_insn.seg == now_seg)
4799         {
4800           as_warn_where (last_insn.file, last_insn.line,
4801                          _("`%s` skips -mlfence-before-ret on `%s`"),
4802                          last_insn.name, i.tm.name);
4803           return;
4804         }
4805
4806       /* Near ret ingore operand size override under CPU64.  */
4807       char prefix = flag_code == CODE_64BIT
4808                     ? 0x48
4809                     : i.prefix[DATA_PREFIX] ? 0x66 : 0x0;
4810
4811       if (lfence_before_ret == lfence_before_ret_not)
4812         {
4813           /* not: 0xf71424, may add prefix
4814              for operand size override or 64-bit code.  */
4815           p = frag_more ((prefix ? 2 : 0) + 6 + 3);
4816           if (prefix)
4817             *p++ = prefix;
4818           *p++ = 0xf7;
4819           *p++ = 0x14;
4820           *p++ = 0x24;
4821           if (prefix)
4822             *p++ = prefix;
4823           *p++ = 0xf7;
4824           *p++ = 0x14;
4825           *p++ = 0x24;
4826         }
4827       else
4828         {
4829           p = frag_more ((prefix ? 1 : 0) + 4 + 3);
4830           if (prefix)
4831             *p++ = prefix;
4832           if (lfence_before_ret == lfence_before_ret_or)
4833             {
4834               /* or: 0x830c2400, may add prefix
4835                  for operand size override or 64-bit code.  */
4836               *p++ = 0x83;
4837               *p++ = 0x0c;
4838             }
4839           else
4840             {
4841               /* shl: 0xc1242400, may add prefix
4842                  for operand size override or 64-bit code.  */
4843               *p++ = 0xc1;
4844               *p++ = 0x24;
4845             }
4846
4847           *p++ = 0x24;
4848           *p++ = 0x0;
4849         }
4850
4851       *p++ = 0xf;
4852       *p++ = 0xae;
4853       *p = 0xe8;
4854     }
4855 }
4856
4857 /* This is the guts of the machine-dependent assembler.  LINE points to a
4858    machine dependent instruction.  This function is supposed to emit
4859    the frags/bytes it assembles to.  */
4860
4861 void
4862 md_assemble (char *line)
4863 {
4864   unsigned int j;
4865   char mnemonic[MAX_MNEM_SIZE], mnem_suffix;
4866   const insn_template *t;
4867
4868   /* Initialize globals.  */
4869   memset (&i, '\0', sizeof (i));
4870   i.rounding.type = rc_none;
4871   for (j = 0; j < MAX_OPERANDS; j++)
4872     i.reloc[j] = NO_RELOC;
4873   memset (disp_expressions, '\0', sizeof (disp_expressions));
4874   memset (im_expressions, '\0', sizeof (im_expressions));
4875   save_stack_p = save_stack;
4876
4877   /* First parse an instruction mnemonic & call i386_operand for the operands.
4878      We assume that the scrubber has arranged it so that line[0] is the valid
4879      start of a (possibly prefixed) mnemonic.  */
4880
4881   line = parse_insn (line, mnemonic);
4882   if (line == NULL)
4883     return;
4884   mnem_suffix = i.suffix;
4885
4886   line = parse_operands (line, mnemonic);
4887   this_operand = -1;
4888   xfree (i.memop1_string);
4889   i.memop1_string = NULL;
4890   if (line == NULL)
4891     return;
4892
4893   /* Now we've parsed the mnemonic into a set of templates, and have the
4894      operands at hand.  */
4895
4896   /* All Intel opcodes have reversed operands except for "bound", "enter",
4897      "invlpg*", "monitor*", "mwait*", "tpause", "umwait", "pvalidate",
4898      "rmpadjust", "rmpupdate", and "rmpquery".  We also don't reverse
4899      intersegment "jmp" and "call" instructions with 2 immediate operands so
4900      that the immediate segment precedes the offset consistently in Intel and
4901      AT&T modes.  */
4902   if (intel_syntax
4903       && i.operands > 1
4904       && (strcmp (mnemonic, "bound") != 0)
4905       && (strncmp (mnemonic, "invlpg", 6) != 0)
4906       && !startswith (mnemonic, "monitor")
4907       && !startswith (mnemonic, "mwait")
4908       && (strcmp (mnemonic, "pvalidate") != 0)
4909       && !startswith (mnemonic, "rmp")
4910       && (strcmp (mnemonic, "tpause") != 0)
4911       && (strcmp (mnemonic, "umwait") != 0)
4912       && !(i.operands == 2
4913            && operand_type_check (i.types[0], imm)
4914            && operand_type_check (i.types[1], imm)))
4915     swap_operands ();
4916
4917   /* The order of the immediates should be reversed
4918      for 2 immediates extrq and insertq instructions */
4919   if (i.imm_operands == 2
4920       && (strcmp (mnemonic, "extrq") == 0
4921           || strcmp (mnemonic, "insertq") == 0))
4922       swap_2_operands (0, 1);
4923
4924   if (i.imm_operands)
4925     optimize_imm ();
4926
4927   if (i.disp_operands && !want_disp32 (current_templates->start)
4928       && (!current_templates->start->opcode_modifier.jump
4929           || i.jumpabsolute || i.types[0].bitfield.baseindex))
4930     {
4931       for (j = 0; j < i.operands; ++j)
4932         {
4933           const expressionS *exp = i.op[j].disps;
4934
4935           if (!operand_type_check (i.types[j], disp))
4936             continue;
4937
4938           if (exp->X_op != O_constant)
4939             continue;
4940
4941           /* Since displacement is signed extended to 64bit, don't allow
4942              disp32 if it is out of range.  */
4943           if (fits_in_signed_long (exp->X_add_number))
4944             continue;
4945
4946           i.types[j].bitfield.disp32 = 0;
4947           if (i.types[j].bitfield.baseindex)
4948             {
4949               as_bad (_("0x%" PRIx64 " out of range of signed 32bit displacement"),
4950                       (uint64_t) exp->X_add_number);
4951               return;
4952             }
4953         }
4954     }
4955
4956   /* Don't optimize displacement for movabs since it only takes 64bit
4957      displacement.  */
4958   if (i.disp_operands
4959       && i.disp_encoding <= disp_encoding_8bit
4960       && (flag_code != CODE_64BIT
4961           || strcmp (mnemonic, "movabs") != 0))
4962     optimize_disp ();
4963
4964   /* Next, we find a template that matches the given insn,
4965      making sure the overlap of the given operands types is consistent
4966      with the template operand types.  */
4967
4968   if (!(t = match_template (mnem_suffix)))
4969     return;
4970
4971   if (sse_check != check_none
4972       /* The opcode space check isn't strictly needed; it's there only to
4973          bypass the logic below when easily possible.  */
4974       && t->opcode_modifier.opcodespace >= SPACE_0F
4975       && t->opcode_modifier.opcodespace <= SPACE_0F3A
4976       && !i.tm.cpu_flags.bitfield.cpusse4a
4977       && !is_any_vex_encoding (t))
4978     {
4979       bool simd = false;
4980
4981       for (j = 0; j < t->operands; ++j)
4982         {
4983           if (t->operand_types[j].bitfield.class == RegMMX)
4984             break;
4985           if (t->operand_types[j].bitfield.class == RegSIMD)
4986             simd = true;
4987         }
4988
4989       if (j >= t->operands && simd)
4990         (sse_check == check_warning
4991          ? as_warn
4992          : as_bad) (_("SSE instruction `%s' is used"), i.tm.name);
4993     }
4994
4995   if (i.tm.opcode_modifier.fwait)
4996     if (!add_prefix (FWAIT_OPCODE))
4997       return;
4998
4999   /* Check if REP prefix is OK.  */
5000   if (i.rep_prefix && i.tm.opcode_modifier.prefixok != PrefixRep)
5001     {
5002       as_bad (_("invalid instruction `%s' after `%s'"),
5003                 i.tm.name, i.rep_prefix);
5004       return;
5005     }
5006
5007   /* Check for lock without a lockable instruction.  Destination operand
5008      must be memory unless it is xchg (0x86).  */
5009   if (i.prefix[LOCK_PREFIX]
5010       && (i.tm.opcode_modifier.prefixok < PrefixLock
5011           || i.mem_operands == 0
5012           || (i.tm.base_opcode != 0x86
5013               && !(i.flags[i.operands - 1] & Operand_Mem))))
5014     {
5015       as_bad (_("expecting lockable instruction after `lock'"));
5016       return;
5017     }
5018
5019   /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD insns.  */
5020   if (i.prefix[DATA_PREFIX]
5021       && (is_any_vex_encoding (&i.tm)
5022           || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX
5023           || i.tm.operand_types[i.imm_operands + 1].bitfield.class >= RegMMX))
5024     {
5025       as_bad (_("data size prefix invalid with `%s'"), i.tm.name);
5026       return;
5027     }
5028
5029   /* Check if HLE prefix is OK.  */
5030   if (i.hle_prefix && !check_hle ())
5031     return;
5032
5033   /* Check BND prefix.  */
5034   if (i.bnd_prefix && !i.tm.opcode_modifier.bndprefixok)
5035     as_bad (_("expecting valid branch instruction after `bnd'"));
5036
5037   /* Check NOTRACK prefix.  */
5038   if (i.notrack_prefix && i.tm.opcode_modifier.prefixok != PrefixNoTrack)
5039     as_bad (_("expecting indirect branch instruction after `notrack'"));
5040
5041   if (i.tm.cpu_flags.bitfield.cpumpx)
5042     {
5043       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
5044         as_bad (_("32-bit address isn't allowed in 64-bit MPX instructions."));
5045       else if (flag_code != CODE_16BIT
5046                ? i.prefix[ADDR_PREFIX]
5047                : i.mem_operands && !i.prefix[ADDR_PREFIX])
5048         as_bad (_("16-bit address isn't allowed in MPX instructions"));
5049     }
5050
5051   /* Insert BND prefix.  */
5052   if (add_bnd_prefix && i.tm.opcode_modifier.bndprefixok)
5053     {
5054       if (!i.prefix[BND_PREFIX])
5055         add_prefix (BND_PREFIX_OPCODE);
5056       else if (i.prefix[BND_PREFIX] != BND_PREFIX_OPCODE)
5057         {
5058           as_warn (_("replacing `rep'/`repe' prefix by `bnd'"));
5059           i.prefix[BND_PREFIX] = BND_PREFIX_OPCODE;
5060         }
5061     }
5062
5063   /* Check string instruction segment overrides.  */
5064   if (i.tm.opcode_modifier.isstring >= IS_STRING_ES_OP0)
5065     {
5066       gas_assert (i.mem_operands);
5067       if (!check_string ())
5068         return;
5069       i.disp_operands = 0;
5070     }
5071
5072   /* The memory operand of (%dx) should be only used with input/output
5073      instructions (base opcodes: 0x6c, 0x6e, 0xec, 0xee).  */
5074   if (i.input_output_operand
5075       && ((i.tm.base_opcode | 0x82) != 0xee
5076           || i.tm.opcode_modifier.opcodespace != SPACE_BASE))
5077     {
5078       as_bad (_("input/output port address isn't allowed with `%s'"),
5079               i.tm.name);
5080       return;
5081     }
5082
5083   if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
5084     optimize_encoding ();
5085
5086   if (use_unaligned_vector_move)
5087     encode_with_unaligned_vector_move ();
5088
5089   if (!process_suffix ())
5090     return;
5091
5092   /* Check if IP-relative addressing requirements can be satisfied.  */
5093   if (i.tm.cpu_flags.bitfield.cpuprefetchi
5094       && !(i.base_reg && i.base_reg->reg_num == RegIP))
5095     as_warn (_("'%s' only supports RIP-relative address"), i.tm.name);
5096
5097   /* Update operand types and check extended states.  */
5098   for (j = 0; j < i.operands; j++)
5099     {
5100       i.types[j] = operand_type_and (i.types[j], i.tm.operand_types[j]);
5101       switch (i.tm.operand_types[j].bitfield.class)
5102         {
5103         default:
5104           break;
5105         case RegMMX:
5106           i.xstate |= xstate_mmx;
5107           break;
5108         case RegMask:
5109           i.xstate |= xstate_mask;
5110           break;
5111         case RegSIMD:
5112           if (i.tm.operand_types[j].bitfield.tmmword)
5113             i.xstate |= xstate_tmm;
5114           else if (i.tm.operand_types[j].bitfield.zmmword)
5115             i.xstate |= xstate_zmm;
5116           else if (i.tm.operand_types[j].bitfield.ymmword)
5117             i.xstate |= xstate_ymm;
5118           else if (i.tm.operand_types[j].bitfield.xmmword)
5119             i.xstate |= xstate_xmm;
5120           break;
5121         }
5122     }
5123
5124   /* Make still unresolved immediate matches conform to size of immediate
5125      given in i.suffix.  */
5126   if (!finalize_imm ())
5127     return;
5128
5129   if (i.types[0].bitfield.imm1)
5130     i.imm_operands = 0; /* kludge for shift insns.  */
5131
5132   /* We only need to check those implicit registers for instructions
5133      with 3 operands or less.  */
5134   if (i.operands <= 3)
5135     for (j = 0; j < i.operands; j++)
5136       if (i.types[j].bitfield.instance != InstanceNone
5137           && !i.types[j].bitfield.xmmword)
5138         i.reg_operands--;
5139
5140   /* For insns with operands there are more diddles to do to the opcode.  */
5141   if (i.operands)
5142     {
5143       if (!process_operands ())
5144         return;
5145     }
5146   else if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
5147     {
5148       /* UnixWare fsub no args is alias for fsubp, fadd -> faddp, etc.  */
5149       as_warn (_("translating to `%sp'"), i.tm.name);
5150     }
5151
5152   if (is_any_vex_encoding (&i.tm))
5153     {
5154       if (!cpu_arch_flags.bitfield.cpui286)
5155         {
5156           as_bad (_("instruction `%s' isn't supported outside of protected mode."),
5157                   i.tm.name);
5158           return;
5159         }
5160
5161       /* Check for explicit REX prefix.  */
5162       if (i.prefix[REX_PREFIX] || i.rex_encoding)
5163         {
5164           as_bad (_("REX prefix invalid with `%s'"), i.tm.name);
5165           return;
5166         }
5167
5168       if (i.tm.opcode_modifier.vex)
5169         build_vex_prefix (t);
5170       else
5171         build_evex_prefix ();
5172
5173       /* The individual REX.RXBW bits got consumed.  */
5174       i.rex &= REX_OPCODE;
5175     }
5176
5177   /* Handle conversion of 'int $3' --> special int3 insn.  XOP or FMA4
5178      instructions may define INT_OPCODE as well, so avoid this corner
5179      case for those instructions that use MODRM.  */
5180   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
5181       && i.tm.base_opcode == INT_OPCODE
5182       && !i.tm.opcode_modifier.modrm
5183       && i.op[0].imms->X_add_number == 3)
5184     {
5185       i.tm.base_opcode = INT3_OPCODE;
5186       i.imm_operands = 0;
5187     }
5188
5189   if ((i.tm.opcode_modifier.jump == JUMP
5190        || i.tm.opcode_modifier.jump == JUMP_BYTE
5191        || i.tm.opcode_modifier.jump == JUMP_DWORD)
5192       && i.op[0].disps->X_op == O_constant)
5193     {
5194       /* Convert "jmp constant" (and "call constant") to a jump (call) to
5195          the absolute address given by the constant.  Since ix86 jumps and
5196          calls are pc relative, we need to generate a reloc.  */
5197       i.op[0].disps->X_add_symbol = &abs_symbol;
5198       i.op[0].disps->X_op = O_symbol;
5199     }
5200
5201   /* For 8 bit registers we need an empty rex prefix.  Also if the
5202      instruction already has a prefix, we need to convert old
5203      registers to new ones.  */
5204
5205   if ((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte
5206        && (i.op[0].regs->reg_flags & RegRex64) != 0)
5207       || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte
5208           && (i.op[1].regs->reg_flags & RegRex64) != 0)
5209       || (((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte)
5210            || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte))
5211           && i.rex != 0))
5212     {
5213       int x;
5214
5215       i.rex |= REX_OPCODE;
5216       for (x = 0; x < 2; x++)
5217         {
5218           /* Look for 8 bit operand that uses old registers.  */
5219           if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte
5220               && (i.op[x].regs->reg_flags & RegRex64) == 0)
5221             {
5222               gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5223               /* In case it is "hi" register, give up.  */
5224               if (i.op[x].regs->reg_num > 3)
5225                 as_bad (_("can't encode register '%s%s' in an "
5226                           "instruction requiring REX prefix."),
5227                         register_prefix, i.op[x].regs->reg_name);
5228
5229               /* Otherwise it is equivalent to the extended register.
5230                  Since the encoding doesn't change this is merely
5231                  cosmetic cleanup for debug output.  */
5232
5233               i.op[x].regs = i.op[x].regs + 8;
5234             }
5235         }
5236     }
5237
5238   if (i.rex == 0 && i.rex_encoding)
5239     {
5240       /* Check if we can add a REX_OPCODE byte.  Look for 8 bit operand
5241          that uses legacy register.  If it is "hi" register, don't add
5242          the REX_OPCODE byte.  */
5243       int x;
5244       for (x = 0; x < 2; x++)
5245         if (i.types[x].bitfield.class == Reg
5246             && i.types[x].bitfield.byte
5247             && (i.op[x].regs->reg_flags & RegRex64) == 0
5248             && i.op[x].regs->reg_num > 3)
5249           {
5250             gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5251             i.rex_encoding = false;
5252             break;
5253           }
5254
5255       if (i.rex_encoding)
5256         i.rex = REX_OPCODE;
5257     }
5258
5259   if (i.rex != 0)
5260     add_prefix (REX_OPCODE | i.rex);
5261
5262   insert_lfence_before ();
5263
5264   /* We are ready to output the insn.  */
5265   output_insn ();
5266
5267   insert_lfence_after ();
5268
5269   last_insn.seg = now_seg;
5270
5271   if (i.tm.opcode_modifier.isprefix)
5272     {
5273       last_insn.kind = last_insn_prefix;
5274       last_insn.name = i.tm.name;
5275       last_insn.file = as_where (&last_insn.line);
5276     }
5277   else
5278     last_insn.kind = last_insn_other;
5279 }
5280
5281 static char *
5282 parse_insn (char *line, char *mnemonic)
5283 {
5284   char *l = line;
5285   char *token_start = l;
5286   char *mnem_p;
5287   int supported;
5288   const insn_template *t;
5289   char *dot_p = NULL;
5290
5291   while (1)
5292     {
5293       mnem_p = mnemonic;
5294       while ((*mnem_p = mnemonic_chars[(unsigned char) *l]) != 0)
5295         {
5296           if (*mnem_p == '.')
5297             dot_p = mnem_p;
5298           mnem_p++;
5299           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
5300             {
5301               as_bad (_("no such instruction: `%s'"), token_start);
5302               return NULL;
5303             }
5304           l++;
5305         }
5306       if (!is_space_char (*l)
5307           && *l != END_OF_INSN
5308           && (intel_syntax
5309               || (*l != PREFIX_SEPARATOR
5310                   && *l != ',')))
5311         {
5312           as_bad (_("invalid character %s in mnemonic"),
5313                   output_invalid (*l));
5314           return NULL;
5315         }
5316       if (token_start == l)
5317         {
5318           if (!intel_syntax && *l == PREFIX_SEPARATOR)
5319             as_bad (_("expecting prefix; got nothing"));
5320           else
5321             as_bad (_("expecting mnemonic; got nothing"));
5322           return NULL;
5323         }
5324
5325       /* Look up instruction (or prefix) via hash table.  */
5326       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5327
5328       if (*l != END_OF_INSN
5329           && (!is_space_char (*l) || l[1] != END_OF_INSN)
5330           && current_templates
5331           && current_templates->start->opcode_modifier.isprefix)
5332         {
5333           if (!cpu_flags_check_cpu64 (current_templates->start->cpu_flags))
5334             {
5335               as_bad ((flag_code != CODE_64BIT
5336                        ? _("`%s' is only supported in 64-bit mode")
5337                        : _("`%s' is not supported in 64-bit mode")),
5338                       current_templates->start->name);
5339               return NULL;
5340             }
5341           /* If we are in 16-bit mode, do not allow addr16 or data16.
5342              Similarly, in 32-bit mode, do not allow addr32 or data32.  */
5343           if ((current_templates->start->opcode_modifier.size == SIZE16
5344                || current_templates->start->opcode_modifier.size == SIZE32)
5345               && flag_code != CODE_64BIT
5346               && ((current_templates->start->opcode_modifier.size == SIZE32)
5347                   ^ (flag_code == CODE_16BIT)))
5348             {
5349               as_bad (_("redundant %s prefix"),
5350                       current_templates->start->name);
5351               return NULL;
5352             }
5353
5354           if (current_templates->start->base_opcode == PSEUDO_PREFIX)
5355             {
5356               /* Handle pseudo prefixes.  */
5357               switch (current_templates->start->extension_opcode)
5358                 {
5359                 case Prefix_Disp8:
5360                   /* {disp8} */
5361                   i.disp_encoding = disp_encoding_8bit;
5362                   break;
5363                 case Prefix_Disp16:
5364                   /* {disp16} */
5365                   i.disp_encoding = disp_encoding_16bit;
5366                   break;
5367                 case Prefix_Disp32:
5368                   /* {disp32} */
5369                   i.disp_encoding = disp_encoding_32bit;
5370                   break;
5371                 case Prefix_Load:
5372                   /* {load} */
5373                   i.dir_encoding = dir_encoding_load;
5374                   break;
5375                 case Prefix_Store:
5376                   /* {store} */
5377                   i.dir_encoding = dir_encoding_store;
5378                   break;
5379                 case Prefix_VEX:
5380                   /* {vex} */
5381                   i.vec_encoding = vex_encoding_vex;
5382                   break;
5383                 case Prefix_VEX3:
5384                   /* {vex3} */
5385                   i.vec_encoding = vex_encoding_vex3;
5386                   break;
5387                 case Prefix_EVEX:
5388                   /* {evex} */
5389                   i.vec_encoding = vex_encoding_evex;
5390                   break;
5391                 case Prefix_REX:
5392                   /* {rex} */
5393                   i.rex_encoding = true;
5394                   break;
5395                 case Prefix_NoOptimize:
5396                   /* {nooptimize} */
5397                   i.no_optimize = true;
5398                   break;
5399                 default:
5400                   abort ();
5401                 }
5402             }
5403           else
5404             {
5405               /* Add prefix, checking for repeated prefixes.  */
5406               switch (add_prefix (current_templates->start->base_opcode))
5407                 {
5408                 case PREFIX_EXIST:
5409                   return NULL;
5410                 case PREFIX_DS:
5411                   if (current_templates->start->cpu_flags.bitfield.cpuibt)
5412                     i.notrack_prefix = current_templates->start->name;
5413                   break;
5414                 case PREFIX_REP:
5415                   if (current_templates->start->cpu_flags.bitfield.cpuhle)
5416                     i.hle_prefix = current_templates->start->name;
5417                   else if (current_templates->start->cpu_flags.bitfield.cpumpx)
5418                     i.bnd_prefix = current_templates->start->name;
5419                   else
5420                     i.rep_prefix = current_templates->start->name;
5421                   break;
5422                 default:
5423                   break;
5424                 }
5425             }
5426           /* Skip past PREFIX_SEPARATOR and reset token_start.  */
5427           token_start = ++l;
5428         }
5429       else
5430         break;
5431     }
5432
5433   if (!current_templates)
5434     {
5435       /* Deprecated functionality (new code should use pseudo-prefixes instead):
5436          Check if we should swap operand or force 32bit displacement in
5437          encoding.  */
5438       if (mnem_p - 2 == dot_p && dot_p[1] == 's')
5439         i.dir_encoding = dir_encoding_swap;
5440       else if (mnem_p - 3 == dot_p
5441                && dot_p[1] == 'd'
5442                && dot_p[2] == '8')
5443         i.disp_encoding = disp_encoding_8bit;
5444       else if (mnem_p - 4 == dot_p
5445                && dot_p[1] == 'd'
5446                && dot_p[2] == '3'
5447                && dot_p[3] == '2')
5448         i.disp_encoding = disp_encoding_32bit;
5449       else
5450         goto check_suffix;
5451       mnem_p = dot_p;
5452       *dot_p = '\0';
5453       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5454     }
5455
5456   if (!current_templates)
5457     {
5458     check_suffix:
5459       if (mnem_p > mnemonic)
5460         {
5461           /* See if we can get a match by trimming off a suffix.  */
5462           switch (mnem_p[-1])
5463             {
5464             case WORD_MNEM_SUFFIX:
5465               if (intel_syntax && (intel_float_operand (mnemonic) & 2))
5466                 i.suffix = SHORT_MNEM_SUFFIX;
5467               else
5468                 /* Fall through.  */
5469               case BYTE_MNEM_SUFFIX:
5470               case QWORD_MNEM_SUFFIX:
5471                 i.suffix = mnem_p[-1];
5472               mnem_p[-1] = '\0';
5473               current_templates
5474                 = (const templates *) str_hash_find (op_hash, mnemonic);
5475               break;
5476             case SHORT_MNEM_SUFFIX:
5477             case LONG_MNEM_SUFFIX:
5478               if (!intel_syntax)
5479                 {
5480                   i.suffix = mnem_p[-1];
5481                   mnem_p[-1] = '\0';
5482                   current_templates
5483                     = (const templates *) str_hash_find (op_hash, mnemonic);
5484                 }
5485               break;
5486
5487               /* Intel Syntax.  */
5488             case 'd':
5489               if (intel_syntax)
5490                 {
5491                   if (intel_float_operand (mnemonic) == 1)
5492                     i.suffix = SHORT_MNEM_SUFFIX;
5493                   else
5494                     i.suffix = LONG_MNEM_SUFFIX;
5495                   mnem_p[-1] = '\0';
5496                   current_templates
5497                     = (const templates *) str_hash_find (op_hash, mnemonic);
5498                 }
5499               break;
5500             }
5501         }
5502
5503       if (!current_templates)
5504         {
5505           as_bad (_("no such instruction: `%s'"), token_start);
5506           return NULL;
5507         }
5508     }
5509
5510   if (current_templates->start->opcode_modifier.jump == JUMP
5511       || current_templates->start->opcode_modifier.jump == JUMP_BYTE)
5512     {
5513       /* Check for a branch hint.  We allow ",pt" and ",pn" for
5514          predict taken and predict not taken respectively.
5515          I'm not sure that branch hints actually do anything on loop
5516          and jcxz insns (JumpByte) for current Pentium4 chips.  They
5517          may work in the future and it doesn't hurt to accept them
5518          now.  */
5519       if (l[0] == ',' && l[1] == 'p')
5520         {
5521           if (l[2] == 't')
5522             {
5523               if (!add_prefix (DS_PREFIX_OPCODE))
5524                 return NULL;
5525               l += 3;
5526             }
5527           else if (l[2] == 'n')
5528             {
5529               if (!add_prefix (CS_PREFIX_OPCODE))
5530                 return NULL;
5531               l += 3;
5532             }
5533         }
5534     }
5535   /* Any other comma loses.  */
5536   if (*l == ',')
5537     {
5538       as_bad (_("invalid character %s in mnemonic"),
5539               output_invalid (*l));
5540       return NULL;
5541     }
5542
5543   /* Check if instruction is supported on specified architecture.  */
5544   supported = 0;
5545   for (t = current_templates->start; t < current_templates->end; ++t)
5546     {
5547       supported |= cpu_flags_match (t);
5548       if (supported == CPU_FLAGS_PERFECT_MATCH)
5549         return l;
5550     }
5551
5552   if (!(supported & CPU_FLAGS_64BIT_MATCH))
5553     as_bad (flag_code == CODE_64BIT
5554             ? _("`%s' is not supported in 64-bit mode")
5555             : _("`%s' is only supported in 64-bit mode"),
5556             current_templates->start->name);
5557   else
5558     as_bad (_("`%s' is not supported on `%s%s'"),
5559             current_templates->start->name,
5560             cpu_arch_name ? cpu_arch_name : default_arch,
5561             cpu_sub_arch_name ? cpu_sub_arch_name : "");
5562
5563   return NULL;
5564 }
5565
5566 static char *
5567 parse_operands (char *l, const char *mnemonic)
5568 {
5569   char *token_start;
5570
5571   /* 1 if operand is pending after ','.  */
5572   unsigned int expecting_operand = 0;
5573
5574   while (*l != END_OF_INSN)
5575     {
5576       /* Non-zero if operand parens not balanced.  */
5577       unsigned int paren_not_balanced = 0;
5578       /* True if inside double quotes.  */
5579       bool in_quotes = false;
5580
5581       /* Skip optional white space before operand.  */
5582       if (is_space_char (*l))
5583         ++l;
5584       if (!is_operand_char (*l) && *l != END_OF_INSN && *l != '"')
5585         {
5586           as_bad (_("invalid character %s before operand %d"),
5587                   output_invalid (*l),
5588                   i.operands + 1);
5589           return NULL;
5590         }
5591       token_start = l;  /* After white space.  */
5592       while (in_quotes || paren_not_balanced || *l != ',')
5593         {
5594           if (*l == END_OF_INSN)
5595             {
5596               if (in_quotes)
5597                 {
5598                   as_bad (_("unbalanced double quotes in operand %d."),
5599                           i.operands + 1);
5600                   return NULL;
5601                 }
5602               if (paren_not_balanced)
5603                 {
5604                   know (!intel_syntax);
5605                   as_bad (_("unbalanced parenthesis in operand %d."),
5606                           i.operands + 1);
5607                   return NULL;
5608                 }
5609               else
5610                 break;  /* we are done */
5611             }
5612           else if (*l == '\\' && l[1] == '"')
5613             ++l;
5614           else if (*l == '"')
5615             in_quotes = !in_quotes;
5616           else if (!in_quotes && !is_operand_char (*l) && !is_space_char (*l))
5617             {
5618               as_bad (_("invalid character %s in operand %d"),
5619                       output_invalid (*l),
5620                       i.operands + 1);
5621               return NULL;
5622             }
5623           if (!intel_syntax && !in_quotes)
5624             {
5625               if (*l == '(')
5626                 ++paren_not_balanced;
5627               if (*l == ')')
5628                 --paren_not_balanced;
5629             }
5630           l++;
5631         }
5632       if (l != token_start)
5633         {                       /* Yes, we've read in another operand.  */
5634           unsigned int operand_ok;
5635           this_operand = i.operands++;
5636           if (i.operands > MAX_OPERANDS)
5637             {
5638               as_bad (_("spurious operands; (%d operands/instruction max)"),
5639                       MAX_OPERANDS);
5640               return NULL;
5641             }
5642           i.types[this_operand].bitfield.unspecified = 1;
5643           /* Now parse operand adding info to 'i' as we go along.  */
5644           END_STRING_AND_SAVE (l);
5645
5646           if (i.mem_operands > 1)
5647             {
5648               as_bad (_("too many memory references for `%s'"),
5649                       mnemonic);
5650               return 0;
5651             }
5652
5653           if (intel_syntax)
5654             operand_ok =
5655               i386_intel_operand (token_start,
5656                                   intel_float_operand (mnemonic));
5657           else
5658             operand_ok = i386_att_operand (token_start);
5659
5660           RESTORE_END_STRING (l);
5661           if (!operand_ok)
5662             return NULL;
5663         }
5664       else
5665         {
5666           if (expecting_operand)
5667             {
5668             expecting_operand_after_comma:
5669               as_bad (_("expecting operand after ','; got nothing"));
5670               return NULL;
5671             }
5672           if (*l == ',')
5673             {
5674               as_bad (_("expecting operand before ','; got nothing"));
5675               return NULL;
5676             }
5677         }
5678
5679       /* Now *l must be either ',' or END_OF_INSN.  */
5680       if (*l == ',')
5681         {
5682           if (*++l == END_OF_INSN)
5683             {
5684               /* Just skip it, if it's \n complain.  */
5685               goto expecting_operand_after_comma;
5686             }
5687           expecting_operand = 1;
5688         }
5689     }
5690   return l;
5691 }
5692
5693 static void
5694 swap_2_operands (unsigned int xchg1, unsigned int xchg2)
5695 {
5696   union i386_op temp_op;
5697   i386_operand_type temp_type;
5698   unsigned int temp_flags;
5699   enum bfd_reloc_code_real temp_reloc;
5700
5701   temp_type = i.types[xchg2];
5702   i.types[xchg2] = i.types[xchg1];
5703   i.types[xchg1] = temp_type;
5704
5705   temp_flags = i.flags[xchg2];
5706   i.flags[xchg2] = i.flags[xchg1];
5707   i.flags[xchg1] = temp_flags;
5708
5709   temp_op = i.op[xchg2];
5710   i.op[xchg2] = i.op[xchg1];
5711   i.op[xchg1] = temp_op;
5712
5713   temp_reloc = i.reloc[xchg2];
5714   i.reloc[xchg2] = i.reloc[xchg1];
5715   i.reloc[xchg1] = temp_reloc;
5716
5717   if (i.mask.reg)
5718     {
5719       if (i.mask.operand == xchg1)
5720         i.mask.operand = xchg2;
5721       else if (i.mask.operand == xchg2)
5722         i.mask.operand = xchg1;
5723     }
5724   if (i.broadcast.type || i.broadcast.bytes)
5725     {
5726       if (i.broadcast.operand == xchg1)
5727         i.broadcast.operand = xchg2;
5728       else if (i.broadcast.operand == xchg2)
5729         i.broadcast.operand = xchg1;
5730     }
5731 }
5732
5733 static void
5734 swap_operands (void)
5735 {
5736   switch (i.operands)
5737     {
5738     case 5:
5739     case 4:
5740       swap_2_operands (1, i.operands - 2);
5741       /* Fall through.  */
5742     case 3:
5743     case 2:
5744       swap_2_operands (0, i.operands - 1);
5745       break;
5746     default:
5747       abort ();
5748     }
5749
5750   if (i.mem_operands == 2)
5751     {
5752       const reg_entry *temp_seg;
5753       temp_seg = i.seg[0];
5754       i.seg[0] = i.seg[1];
5755       i.seg[1] = temp_seg;
5756     }
5757 }
5758
5759 /* Try to ensure constant immediates are represented in the smallest
5760    opcode possible.  */
5761 static void
5762 optimize_imm (void)
5763 {
5764   char guess_suffix = 0;
5765   int op;
5766
5767   if (i.suffix)
5768     guess_suffix = i.suffix;
5769   else if (i.reg_operands)
5770     {
5771       /* Figure out a suffix from the last register operand specified.
5772          We can't do this properly yet, i.e. excluding special register
5773          instances, but the following works for instructions with
5774          immediates.  In any case, we can't set i.suffix yet.  */
5775       for (op = i.operands; --op >= 0;)
5776         if (i.types[op].bitfield.class != Reg)
5777           continue;
5778         else if (i.types[op].bitfield.byte)
5779           {
5780             guess_suffix = BYTE_MNEM_SUFFIX;
5781             break;
5782           }
5783         else if (i.types[op].bitfield.word)
5784           {
5785             guess_suffix = WORD_MNEM_SUFFIX;
5786             break;
5787           }
5788         else if (i.types[op].bitfield.dword)
5789           {
5790             guess_suffix = LONG_MNEM_SUFFIX;
5791             break;
5792           }
5793         else if (i.types[op].bitfield.qword)
5794           {
5795             guess_suffix = QWORD_MNEM_SUFFIX;
5796             break;
5797           }
5798     }
5799   else if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
5800     guess_suffix = WORD_MNEM_SUFFIX;
5801
5802   for (op = i.operands; --op >= 0;)
5803     if (operand_type_check (i.types[op], imm))
5804       {
5805         switch (i.op[op].imms->X_op)
5806           {
5807           case O_constant:
5808             /* If a suffix is given, this operand may be shortened.  */
5809             switch (guess_suffix)
5810               {
5811               case LONG_MNEM_SUFFIX:
5812                 i.types[op].bitfield.imm32 = 1;
5813                 i.types[op].bitfield.imm64 = 1;
5814                 break;
5815               case WORD_MNEM_SUFFIX:
5816                 i.types[op].bitfield.imm16 = 1;
5817                 i.types[op].bitfield.imm32 = 1;
5818                 i.types[op].bitfield.imm32s = 1;
5819                 i.types[op].bitfield.imm64 = 1;
5820                 break;
5821               case BYTE_MNEM_SUFFIX:
5822                 i.types[op].bitfield.imm8 = 1;
5823                 i.types[op].bitfield.imm8s = 1;
5824                 i.types[op].bitfield.imm16 = 1;
5825                 i.types[op].bitfield.imm32 = 1;
5826                 i.types[op].bitfield.imm32s = 1;
5827                 i.types[op].bitfield.imm64 = 1;
5828                 break;
5829               }
5830
5831             /* If this operand is at most 16 bits, convert it
5832                to a signed 16 bit number before trying to see
5833                whether it will fit in an even smaller size.
5834                This allows a 16-bit operand such as $0xffe0 to
5835                be recognised as within Imm8S range.  */
5836             if ((i.types[op].bitfield.imm16)
5837                 && fits_in_unsigned_word (i.op[op].imms->X_add_number))
5838               {
5839                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
5840                                                 ^ 0x8000) - 0x8000);
5841               }
5842 #ifdef BFD64
5843             /* Store 32-bit immediate in 64-bit for 64-bit BFD.  */
5844             if ((i.types[op].bitfield.imm32)
5845                 && fits_in_unsigned_long (i.op[op].imms->X_add_number))
5846               {
5847                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
5848                                                 ^ ((offsetT) 1 << 31))
5849                                                - ((offsetT) 1 << 31));
5850               }
5851 #endif
5852             i.types[op]
5853               = operand_type_or (i.types[op],
5854                                  smallest_imm_type (i.op[op].imms->X_add_number));
5855
5856             /* We must avoid matching of Imm32 templates when 64bit
5857                only immediate is available.  */
5858             if (guess_suffix == QWORD_MNEM_SUFFIX)
5859               i.types[op].bitfield.imm32 = 0;
5860             break;
5861
5862           case O_absent:
5863           case O_register:
5864             abort ();
5865
5866             /* Symbols and expressions.  */
5867           default:
5868             /* Convert symbolic operand to proper sizes for matching, but don't
5869                prevent matching a set of insns that only supports sizes other
5870                than those matching the insn suffix.  */
5871             {
5872               i386_operand_type mask, allowed;
5873               const insn_template *t = current_templates->start;
5874
5875               operand_type_set (&mask, 0);
5876               switch (guess_suffix)
5877                 {
5878                 case QWORD_MNEM_SUFFIX:
5879                   mask.bitfield.imm64 = 1;
5880                   mask.bitfield.imm32s = 1;
5881                   break;
5882                 case LONG_MNEM_SUFFIX:
5883                   mask.bitfield.imm32 = 1;
5884                   break;
5885                 case WORD_MNEM_SUFFIX:
5886                   mask.bitfield.imm16 = 1;
5887                   break;
5888                 case BYTE_MNEM_SUFFIX:
5889                   mask.bitfield.imm8 = 1;
5890                   break;
5891                 default:
5892                   break;
5893                 }
5894
5895               allowed = operand_type_and (t->operand_types[op], mask);
5896               while (++t < current_templates->end)
5897                 {
5898                   allowed = operand_type_or (allowed, t->operand_types[op]);
5899                   allowed = operand_type_and (allowed, mask);
5900                 }
5901
5902               if (!operand_type_all_zero (&allowed))
5903                 i.types[op] = operand_type_and (i.types[op], mask);
5904             }
5905             break;
5906           }
5907       }
5908 }
5909
5910 /* Try to use the smallest displacement type too.  */
5911 static void
5912 optimize_disp (void)
5913 {
5914   int op;
5915
5916   for (op = i.operands; --op >= 0;)
5917     if (operand_type_check (i.types[op], disp))
5918       {
5919         if (i.op[op].disps->X_op == O_constant)
5920           {
5921             offsetT op_disp = i.op[op].disps->X_add_number;
5922
5923             if (!op_disp && i.types[op].bitfield.baseindex)
5924               {
5925                 i.types[op] = operand_type_and_not (i.types[op], anydisp);
5926                 i.op[op].disps = NULL;
5927                 i.disp_operands--;
5928                 continue;
5929               }
5930
5931             if (i.types[op].bitfield.disp16
5932                 && fits_in_unsigned_word (op_disp))
5933               {
5934                 /* If this operand is at most 16 bits, convert
5935                    to a signed 16 bit number and don't use 64bit
5936                    displacement.  */
5937                 op_disp = ((op_disp ^ 0x8000) - 0x8000);
5938                 i.types[op].bitfield.disp64 = 0;
5939               }
5940
5941 #ifdef BFD64
5942             /* Optimize 64-bit displacement to 32-bit for 64-bit BFD.  */
5943             if ((flag_code != CODE_64BIT
5944                  ? i.types[op].bitfield.disp32
5945                  : want_disp32 (current_templates->start)
5946                    && (!current_templates->start->opcode_modifier.jump
5947                        || i.jumpabsolute || i.types[op].bitfield.baseindex))
5948                 && fits_in_unsigned_long (op_disp))
5949               {
5950                 /* If this operand is at most 32 bits, convert
5951                    to a signed 32 bit number and don't use 64bit
5952                    displacement.  */
5953                 op_disp = (op_disp ^ ((offsetT) 1 << 31)) - ((addressT) 1 << 31);
5954                 i.types[op].bitfield.disp64 = 0;
5955                 i.types[op].bitfield.disp32 = 1;
5956               }
5957
5958             if (flag_code == CODE_64BIT && fits_in_signed_long (op_disp))
5959               {
5960                 i.types[op].bitfield.disp64 = 0;
5961                 i.types[op].bitfield.disp32 = 1;
5962               }
5963 #endif
5964             if ((i.types[op].bitfield.disp32
5965                  || i.types[op].bitfield.disp16)
5966                 && fits_in_disp8 (op_disp))
5967               i.types[op].bitfield.disp8 = 1;
5968
5969             i.op[op].disps->X_add_number = op_disp;
5970           }
5971         else if (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
5972                  || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL)
5973           {
5974             fix_new_exp (frag_now, frag_more (0) - frag_now->fr_literal, 0,
5975                          i.op[op].disps, 0, i.reloc[op]);
5976             i.types[op] = operand_type_and_not (i.types[op], anydisp);
5977           }
5978         else
5979           /* We only support 64bit displacement on constants.  */
5980           i.types[op].bitfield.disp64 = 0;
5981       }
5982 }
5983
5984 /* Return 1 if there is a match in broadcast bytes between operand
5985    GIVEN and instruction template T.   */
5986
5987 static INLINE int
5988 match_broadcast_size (const insn_template *t, unsigned int given)
5989 {
5990   return ((t->opcode_modifier.broadcast == BYTE_BROADCAST
5991            && i.types[given].bitfield.byte)
5992           || (t->opcode_modifier.broadcast == WORD_BROADCAST
5993               && i.types[given].bitfield.word)
5994           || (t->opcode_modifier.broadcast == DWORD_BROADCAST
5995               && i.types[given].bitfield.dword)
5996           || (t->opcode_modifier.broadcast == QWORD_BROADCAST
5997               && i.types[given].bitfield.qword));
5998 }
5999
6000 /* Check if operands are valid for the instruction.  */
6001
6002 static int
6003 check_VecOperands (const insn_template *t)
6004 {
6005   unsigned int op;
6006   i386_cpu_flags cpu;
6007
6008   /* Templates allowing for ZMMword as well as YMMword and/or XMMword for
6009      any one operand are implicity requiring AVX512VL support if the actual
6010      operand size is YMMword or XMMword.  Since this function runs after
6011      template matching, there's no need to check for YMMword/XMMword in
6012      the template.  */
6013   cpu = cpu_flags_and (t->cpu_flags, avx512);
6014   if (!cpu_flags_all_zero (&cpu)
6015       && !t->cpu_flags.bitfield.cpuavx512vl
6016       && !cpu_arch_flags.bitfield.cpuavx512vl)
6017     {
6018       for (op = 0; op < t->operands; ++op)
6019         {
6020           if (t->operand_types[op].bitfield.zmmword
6021               && (i.types[op].bitfield.ymmword
6022                   || i.types[op].bitfield.xmmword))
6023             {
6024               i.error = unsupported;
6025               return 1;
6026             }
6027         }
6028     }
6029
6030   /* Somewhat similarly, templates specifying both AVX and AVX2 are
6031      requiring AVX2 support if the actual operand size is YMMword.  */
6032   if (t->cpu_flags.bitfield.cpuavx
6033       && t->cpu_flags.bitfield.cpuavx2
6034       && !cpu_arch_flags.bitfield.cpuavx2)
6035     {
6036       for (op = 0; op < t->operands; ++op)
6037         {
6038           if (t->operand_types[op].bitfield.xmmword
6039               && i.types[op].bitfield.ymmword)
6040             {
6041               i.error = unsupported;
6042               return 1;
6043             }
6044         }
6045     }
6046
6047   /* Without VSIB byte, we can't have a vector register for index.  */
6048   if (!t->opcode_modifier.sib
6049       && i.index_reg
6050       && (i.index_reg->reg_type.bitfield.xmmword
6051           || i.index_reg->reg_type.bitfield.ymmword
6052           || i.index_reg->reg_type.bitfield.zmmword))
6053     {
6054       i.error = unsupported_vector_index_register;
6055       return 1;
6056     }
6057
6058   /* Check if default mask is allowed.  */
6059   if (t->opcode_modifier.operandconstraint == NO_DEFAULT_MASK
6060       && (!i.mask.reg || i.mask.reg->reg_num == 0))
6061     {
6062       i.error = no_default_mask;
6063       return 1;
6064     }
6065
6066   /* For VSIB byte, we need a vector register for index, and all vector
6067      registers must be distinct.  */
6068   if (t->opcode_modifier.sib && t->opcode_modifier.sib != SIBMEM)
6069     {
6070       if (!i.index_reg
6071           || !((t->opcode_modifier.sib == VECSIB128
6072                 && i.index_reg->reg_type.bitfield.xmmword)
6073                || (t->opcode_modifier.sib == VECSIB256
6074                    && i.index_reg->reg_type.bitfield.ymmword)
6075                || (t->opcode_modifier.sib == VECSIB512
6076                    && i.index_reg->reg_type.bitfield.zmmword)))
6077       {
6078         i.error = invalid_vsib_address;
6079         return 1;
6080       }
6081
6082       gas_assert (i.reg_operands == 2 || i.mask.reg);
6083       if (i.reg_operands == 2 && !i.mask.reg)
6084         {
6085           gas_assert (i.types[0].bitfield.class == RegSIMD);
6086           gas_assert (i.types[0].bitfield.xmmword
6087                       || i.types[0].bitfield.ymmword);
6088           gas_assert (i.types[2].bitfield.class == RegSIMD);
6089           gas_assert (i.types[2].bitfield.xmmword
6090                       || i.types[2].bitfield.ymmword);
6091           if (operand_check == check_none)
6092             return 0;
6093           if (register_number (i.op[0].regs)
6094               != register_number (i.index_reg)
6095               && register_number (i.op[2].regs)
6096                  != register_number (i.index_reg)
6097               && register_number (i.op[0].regs)
6098                  != register_number (i.op[2].regs))
6099             return 0;
6100           if (operand_check == check_error)
6101             {
6102               i.error = invalid_vector_register_set;
6103               return 1;
6104             }
6105           as_warn (_("mask, index, and destination registers should be distinct"));
6106         }
6107       else if (i.reg_operands == 1 && i.mask.reg)
6108         {
6109           if (i.types[1].bitfield.class == RegSIMD
6110               && (i.types[1].bitfield.xmmword
6111                   || i.types[1].bitfield.ymmword
6112                   || i.types[1].bitfield.zmmword)
6113               && (register_number (i.op[1].regs)
6114                   == register_number (i.index_reg)))
6115             {
6116               if (operand_check == check_error)
6117                 {
6118                   i.error = invalid_vector_register_set;
6119                   return 1;
6120                 }
6121               if (operand_check != check_none)
6122                 as_warn (_("index and destination registers should be distinct"));
6123             }
6124         }
6125     }
6126
6127   /* For AMX instructions with 3 TMM register operands, all operands
6128       must be distinct.  */
6129   if (i.reg_operands == 3
6130       && t->operand_types[0].bitfield.tmmword
6131       && (i.op[0].regs == i.op[1].regs
6132           || i.op[0].regs == i.op[2].regs
6133           || i.op[1].regs == i.op[2].regs))
6134     {
6135       i.error = invalid_tmm_register_set;
6136       return 1;
6137     }
6138
6139   /* For some special instructions require that destination must be distinct
6140      from source registers.  */
6141   if (t->opcode_modifier.operandconstraint == DISTINCT_DEST)
6142     {
6143       unsigned int dest_reg = i.operands - 1;
6144
6145       know (i.operands >= 3);
6146
6147       /* #UD if dest_reg == src1_reg or dest_reg == src2_reg.  */
6148       if (i.op[dest_reg - 1].regs == i.op[dest_reg].regs
6149           || (i.reg_operands > 2
6150               && i.op[dest_reg - 2].regs == i.op[dest_reg].regs))
6151         {
6152           i.error = invalid_dest_and_src_register_set;
6153           return 1;
6154         }
6155     }
6156
6157   /* Check if broadcast is supported by the instruction and is applied
6158      to the memory operand.  */
6159   if (i.broadcast.type || i.broadcast.bytes)
6160     {
6161       i386_operand_type type, overlap;
6162
6163       /* Check if specified broadcast is supported in this instruction,
6164          and its broadcast bytes match the memory operand.  */
6165       op = i.broadcast.operand;
6166       if (!t->opcode_modifier.broadcast
6167           || !(i.flags[op] & Operand_Mem)
6168           || (!i.types[op].bitfield.unspecified
6169               && !match_broadcast_size (t, op)))
6170         {
6171         bad_broadcast:
6172           i.error = unsupported_broadcast;
6173           return 1;
6174         }
6175
6176       if (i.broadcast.type)
6177         i.broadcast.bytes = ((1 << (t->opcode_modifier.broadcast - 1))
6178                              * i.broadcast.type);
6179       operand_type_set (&type, 0);
6180       switch (get_broadcast_bytes (t, false))
6181         {
6182         case 2:
6183           type.bitfield.word = 1;
6184           break;
6185         case 4:
6186           type.bitfield.dword = 1;
6187           break;
6188         case 8:
6189           type.bitfield.qword = 1;
6190           break;
6191         case 16:
6192           type.bitfield.xmmword = 1;
6193           break;
6194         case 32:
6195           type.bitfield.ymmword = 1;
6196           break;
6197         case 64:
6198           type.bitfield.zmmword = 1;
6199           break;
6200         default:
6201           goto bad_broadcast;
6202         }
6203
6204       overlap = operand_type_and (type, t->operand_types[op]);
6205       if (t->operand_types[op].bitfield.class == RegSIMD
6206           && t->operand_types[op].bitfield.byte
6207              + t->operand_types[op].bitfield.word
6208              + t->operand_types[op].bitfield.dword
6209              + t->operand_types[op].bitfield.qword > 1)
6210         {
6211           overlap.bitfield.xmmword = 0;
6212           overlap.bitfield.ymmword = 0;
6213           overlap.bitfield.zmmword = 0;
6214         }
6215       if (operand_type_all_zero (&overlap))
6216           goto bad_broadcast;
6217
6218       if (t->opcode_modifier.checkregsize)
6219         {
6220           unsigned int j;
6221
6222           type.bitfield.baseindex = 1;
6223           for (j = 0; j < i.operands; ++j)
6224             {
6225               if (j != op
6226                   && !operand_type_register_match(i.types[j],
6227                                                   t->operand_types[j],
6228                                                   type,
6229                                                   t->operand_types[op]))
6230                 goto bad_broadcast;
6231             }
6232         }
6233     }
6234   /* If broadcast is supported in this instruction, we need to check if
6235      operand of one-element size isn't specified without broadcast.  */
6236   else if (t->opcode_modifier.broadcast && i.mem_operands)
6237     {
6238       /* Find memory operand.  */
6239       for (op = 0; op < i.operands; op++)
6240         if (i.flags[op] & Operand_Mem)
6241           break;
6242       gas_assert (op < i.operands);
6243       /* Check size of the memory operand.  */
6244       if (match_broadcast_size (t, op))
6245         {
6246           i.error = broadcast_needed;
6247           return 1;
6248         }
6249     }
6250   else
6251     op = MAX_OPERANDS - 1; /* Avoid uninitialized variable warning.  */
6252
6253   /* Check if requested masking is supported.  */
6254   if (i.mask.reg)
6255     {
6256       switch (t->opcode_modifier.masking)
6257         {
6258         case BOTH_MASKING:
6259           break;
6260         case MERGING_MASKING:
6261           if (i.mask.zeroing)
6262             {
6263         case 0:
6264               i.error = unsupported_masking;
6265               return 1;
6266             }
6267           break;
6268         case DYNAMIC_MASKING:
6269           /* Memory destinations allow only merging masking.  */
6270           if (i.mask.zeroing && i.mem_operands)
6271             {
6272               /* Find memory operand.  */
6273               for (op = 0; op < i.operands; op++)
6274                 if (i.flags[op] & Operand_Mem)
6275                   break;
6276               gas_assert (op < i.operands);
6277               if (op == i.operands - 1)
6278                 {
6279                   i.error = unsupported_masking;
6280                   return 1;
6281                 }
6282             }
6283           break;
6284         default:
6285           abort ();
6286         }
6287     }
6288
6289   /* Check if masking is applied to dest operand.  */
6290   if (i.mask.reg && (i.mask.operand != i.operands - 1))
6291     {
6292       i.error = mask_not_on_destination;
6293       return 1;
6294     }
6295
6296   /* Check RC/SAE.  */
6297   if (i.rounding.type != rc_none)
6298     {
6299       if (!t->opcode_modifier.sae
6300           || ((i.rounding.type != saeonly) != t->opcode_modifier.staticrounding)
6301           || i.mem_operands)
6302         {
6303           i.error = unsupported_rc_sae;
6304           return 1;
6305         }
6306
6307       /* Non-EVEX.LIG forms need to have a ZMM register as at least one
6308          operand.  */
6309       if (t->opcode_modifier.evex != EVEXLIG)
6310         {
6311           for (op = 0; op < t->operands; ++op)
6312             if (i.types[op].bitfield.zmmword)
6313               break;
6314           if (op >= t->operands)
6315             {
6316               i.error = operand_size_mismatch;
6317               return 1;
6318             }
6319         }
6320     }
6321
6322   /* Check the special Imm4 cases; must be the first operand.  */
6323   if (t->cpu_flags.bitfield.cpuxop && t->operands == 5)
6324     {
6325       if (i.op[0].imms->X_op != O_constant
6326           || !fits_in_imm4 (i.op[0].imms->X_add_number))
6327         {
6328           i.error = bad_imm4;
6329           return 1;
6330         }
6331
6332       /* Turn off Imm<N> so that update_imm won't complain.  */
6333       operand_type_set (&i.types[0], 0);
6334     }
6335
6336   /* Check vector Disp8 operand.  */
6337   if (t->opcode_modifier.disp8memshift
6338       && i.disp_encoding <= disp_encoding_8bit)
6339     {
6340       if (i.broadcast.bytes)
6341         i.memshift = t->opcode_modifier.broadcast - 1;
6342       else if (t->opcode_modifier.disp8memshift != DISP8_SHIFT_VL)
6343         i.memshift = t->opcode_modifier.disp8memshift;
6344       else
6345         {
6346           const i386_operand_type *type = NULL, *fallback = NULL;
6347
6348           i.memshift = 0;
6349           for (op = 0; op < i.operands; op++)
6350             if (i.flags[op] & Operand_Mem)
6351               {
6352                 if (t->opcode_modifier.evex == EVEXLIG)
6353                   i.memshift = 2 + (i.suffix == QWORD_MNEM_SUFFIX);
6354                 else if (t->operand_types[op].bitfield.xmmword
6355                          + t->operand_types[op].bitfield.ymmword
6356                          + t->operand_types[op].bitfield.zmmword <= 1)
6357                   type = &t->operand_types[op];
6358                 else if (!i.types[op].bitfield.unspecified)
6359                   type = &i.types[op];
6360                 else /* Ambiguities get resolved elsewhere.  */
6361                   fallback = &t->operand_types[op];
6362               }
6363             else if (i.types[op].bitfield.class == RegSIMD
6364                      && t->opcode_modifier.evex != EVEXLIG)
6365               {
6366                 if (i.types[op].bitfield.zmmword)
6367                   i.memshift = 6;
6368                 else if (i.types[op].bitfield.ymmword && i.memshift < 5)
6369                   i.memshift = 5;
6370                 else if (i.types[op].bitfield.xmmword && i.memshift < 4)
6371                   i.memshift = 4;
6372               }
6373
6374           if (!type && !i.memshift)
6375             type = fallback;
6376           if (type)
6377             {
6378               if (type->bitfield.zmmword)
6379                 i.memshift = 6;
6380               else if (type->bitfield.ymmword)
6381                 i.memshift = 5;
6382               else if (type->bitfield.xmmword)
6383                 i.memshift = 4;
6384             }
6385
6386           /* For the check in fits_in_disp8().  */
6387           if (i.memshift == 0)
6388             i.memshift = -1;
6389         }
6390
6391       for (op = 0; op < i.operands; op++)
6392         if (operand_type_check (i.types[op], disp)
6393             && i.op[op].disps->X_op == O_constant)
6394           {
6395             if (fits_in_disp8 (i.op[op].disps->X_add_number))
6396               {
6397                 i.types[op].bitfield.disp8 = 1;
6398                 return 0;
6399               }
6400             i.types[op].bitfield.disp8 = 0;
6401           }
6402     }
6403
6404   i.memshift = 0;
6405
6406   return 0;
6407 }
6408
6409 /* Check if encoding requirements are met by the instruction.  */
6410
6411 static int
6412 VEX_check_encoding (const insn_template *t)
6413 {
6414   if (i.vec_encoding == vex_encoding_error)
6415     {
6416       i.error = unsupported;
6417       return 1;
6418     }
6419
6420   if (i.vec_encoding == vex_encoding_evex)
6421     {
6422       /* This instruction must be encoded with EVEX prefix.  */
6423       if (!is_evex_encoding (t))
6424         {
6425           i.error = unsupported;
6426           return 1;
6427         }
6428       return 0;
6429     }
6430
6431   if (!t->opcode_modifier.vex)
6432     {
6433       /* This instruction template doesn't have VEX prefix.  */
6434       if (i.vec_encoding != vex_encoding_default)
6435         {
6436           i.error = unsupported;
6437           return 1;
6438         }
6439       return 0;
6440     }
6441
6442   return 0;
6443 }
6444
6445 /* Helper function for the progress() macro in match_template().  */
6446 static INLINE enum i386_error progress (enum i386_error new,
6447                                         enum i386_error last,
6448                                         unsigned int line, unsigned int *line_p)
6449 {
6450   if (line <= *line_p)
6451     return last;
6452   *line_p = line;
6453   return new;
6454 }
6455
6456 static const insn_template *
6457 match_template (char mnem_suffix)
6458 {
6459   /* Points to template once we've found it.  */
6460   const insn_template *t;
6461   i386_operand_type overlap0, overlap1, overlap2, overlap3;
6462   i386_operand_type overlap4;
6463   unsigned int found_reverse_match;
6464   i386_opcode_modifier suffix_check;
6465   i386_operand_type operand_types [MAX_OPERANDS];
6466   int addr_prefix_disp;
6467   unsigned int j, size_match, check_register, errline = __LINE__;
6468   enum i386_error specific_error = number_of_operands_mismatch;
6469 #define progress(err) progress (err, specific_error, __LINE__, &errline)
6470
6471 #if MAX_OPERANDS != 5
6472 # error "MAX_OPERANDS must be 5."
6473 #endif
6474
6475   found_reverse_match = 0;
6476   addr_prefix_disp = -1;
6477
6478   /* Prepare for mnemonic suffix check.  */
6479   memset (&suffix_check, 0, sizeof (suffix_check));
6480   switch (mnem_suffix)
6481     {
6482     case BYTE_MNEM_SUFFIX:
6483       suffix_check.no_bsuf = 1;
6484       break;
6485     case WORD_MNEM_SUFFIX:
6486       suffix_check.no_wsuf = 1;
6487       break;
6488     case SHORT_MNEM_SUFFIX:
6489       suffix_check.no_ssuf = 1;
6490       break;
6491     case LONG_MNEM_SUFFIX:
6492       suffix_check.no_lsuf = 1;
6493       break;
6494     case QWORD_MNEM_SUFFIX:
6495       suffix_check.no_qsuf = 1;
6496       break;
6497     default:
6498       /* NB: In Intel syntax, normally we can check for memory operand
6499          size when there is no mnemonic suffix.  But jmp and call have
6500          2 different encodings with Dword memory operand size, one with
6501          No_ldSuf and the other without.  i.suffix is set to
6502          LONG_DOUBLE_MNEM_SUFFIX to skip the one with No_ldSuf.  */
6503       if (i.suffix == LONG_DOUBLE_MNEM_SUFFIX)
6504         suffix_check.no_ldsuf = 1;
6505     }
6506
6507   for (t = current_templates->start; t < current_templates->end; t++)
6508     {
6509       addr_prefix_disp = -1;
6510       found_reverse_match = 0;
6511
6512       /* Must have right number of operands.  */
6513       if (i.operands != t->operands)
6514         continue;
6515
6516       /* Check processor support.  */
6517       specific_error = progress (unsupported);
6518       if (cpu_flags_match (t) != CPU_FLAGS_PERFECT_MATCH)
6519         continue;
6520
6521       /* Check AT&T mnemonic.   */
6522       specific_error = progress (unsupported_with_intel_mnemonic);
6523       if (intel_mnemonic && t->opcode_modifier.attmnemonic)
6524         continue;
6525
6526       /* Check AT&T/Intel syntax.  */
6527       specific_error = progress (unsupported_syntax);
6528       if ((intel_syntax && t->opcode_modifier.attsyntax)
6529           || (!intel_syntax && t->opcode_modifier.intelsyntax))
6530         continue;
6531
6532       /* Check Intel64/AMD64 ISA.   */
6533       switch (isa64)
6534         {
6535         default:
6536           /* Default: Don't accept Intel64.  */
6537           if (t->opcode_modifier.isa64 == INTEL64)
6538             continue;
6539           break;
6540         case amd64:
6541           /* -mamd64: Don't accept Intel64 and Intel64 only.  */
6542           if (t->opcode_modifier.isa64 >= INTEL64)
6543             continue;
6544           break;
6545         case intel64:
6546           /* -mintel64: Don't accept AMD64.  */
6547           if (t->opcode_modifier.isa64 == AMD64 && flag_code == CODE_64BIT)
6548             continue;
6549           break;
6550         }
6551
6552       /* Check the suffix.  */
6553       specific_error = progress (invalid_instruction_suffix);
6554       if ((t->opcode_modifier.no_bsuf && suffix_check.no_bsuf)
6555           || (t->opcode_modifier.no_wsuf && suffix_check.no_wsuf)
6556           || (t->opcode_modifier.no_lsuf && suffix_check.no_lsuf)
6557           || (t->opcode_modifier.no_ssuf && suffix_check.no_ssuf)
6558           || (t->opcode_modifier.no_qsuf && suffix_check.no_qsuf)
6559           || (t->opcode_modifier.no_ldsuf && suffix_check.no_ldsuf))
6560         continue;
6561
6562       specific_error = progress (operand_size_mismatch);
6563       size_match = operand_size_match (t);
6564       if (!size_match)
6565         continue;
6566
6567       /* This is intentionally not
6568
6569          if (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
6570
6571          as the case of a missing * on the operand is accepted (perhaps with
6572          a warning, issued further down).  */
6573       specific_error = progress (operand_type_mismatch);
6574       if (i.jumpabsolute && t->opcode_modifier.jump != JUMP_ABSOLUTE)
6575         continue;
6576
6577       for (j = 0; j < MAX_OPERANDS; j++)
6578         operand_types[j] = t->operand_types[j];
6579
6580       /* In general, don't allow
6581          - 64-bit operands outside of 64-bit mode,
6582          - 32-bit operands on pre-386.  */
6583       specific_error = progress (mnem_suffix ? invalid_instruction_suffix
6584                                              : operand_size_mismatch);
6585       j = i.imm_operands + (t->operands > i.imm_operands + 1);
6586       if (((i.suffix == QWORD_MNEM_SUFFIX
6587             && flag_code != CODE_64BIT
6588             && !(t->opcode_modifier.opcodespace == SPACE_0F
6589                  && t->base_opcode == 0xc7
6590                  && t->opcode_modifier.opcodeprefix == PREFIX_NONE
6591                  && t->extension_opcode == 1) /* cmpxchg8b */)
6592            || (i.suffix == LONG_MNEM_SUFFIX
6593                && !cpu_arch_flags.bitfield.cpui386))
6594           && (intel_syntax
6595               ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
6596                  && !intel_float_operand (t->name))
6597               : intel_float_operand (t->name) != 2)
6598           && (t->operands == i.imm_operands
6599               || (operand_types[i.imm_operands].bitfield.class != RegMMX
6600                && operand_types[i.imm_operands].bitfield.class != RegSIMD
6601                && operand_types[i.imm_operands].bitfield.class != RegMask)
6602               || (operand_types[j].bitfield.class != RegMMX
6603                   && operand_types[j].bitfield.class != RegSIMD
6604                   && operand_types[j].bitfield.class != RegMask))
6605           && !t->opcode_modifier.sib)
6606         continue;
6607
6608       /* Do not verify operands when there are none.  */
6609       if (!t->operands)
6610         {
6611           if (VEX_check_encoding (t))
6612             {
6613               specific_error = progress (i.error);
6614               continue;
6615             }
6616
6617           /* We've found a match; break out of loop.  */
6618           break;
6619         }
6620
6621       if (!t->opcode_modifier.jump
6622           || t->opcode_modifier.jump == JUMP_ABSOLUTE)
6623         {
6624           /* There should be only one Disp operand.  */
6625           for (j = 0; j < MAX_OPERANDS; j++)
6626             if (operand_type_check (operand_types[j], disp))
6627               break;
6628           if (j < MAX_OPERANDS)
6629             {
6630               bool override = (i.prefix[ADDR_PREFIX] != 0);
6631
6632               addr_prefix_disp = j;
6633
6634               /* Address size prefix will turn Disp64 operand into Disp32 and
6635                  Disp32/Disp16 one into Disp16/Disp32 respectively.  */
6636               switch (flag_code)
6637                 {
6638                 case CODE_16BIT:
6639                   override = !override;
6640                   /* Fall through.  */
6641                 case CODE_32BIT:
6642                   if (operand_types[j].bitfield.disp32
6643                       && operand_types[j].bitfield.disp16)
6644                     {
6645                       operand_types[j].bitfield.disp16 = override;
6646                       operand_types[j].bitfield.disp32 = !override;
6647                     }
6648                   gas_assert (!operand_types[j].bitfield.disp64);
6649                   break;
6650
6651                 case CODE_64BIT:
6652                   if (operand_types[j].bitfield.disp64)
6653                     {
6654                       gas_assert (!operand_types[j].bitfield.disp32);
6655                       operand_types[j].bitfield.disp32 = override;
6656                       operand_types[j].bitfield.disp64 = !override;
6657                     }
6658                   operand_types[j].bitfield.disp16 = 0;
6659                   break;
6660                 }
6661             }
6662         }
6663
6664       switch (i.reloc[0])
6665         {
6666         case BFD_RELOC_386_GOT32:
6667           /* Force 0x8b encoding for "mov foo@GOT, %eax".  */
6668           if (t->base_opcode == 0xa0
6669               && t->opcode_modifier.opcodespace == SPACE_BASE)
6670             continue;
6671           break;
6672         case BFD_RELOC_386_TLS_GOTIE:
6673         case BFD_RELOC_386_TLS_LE_32:
6674         case BFD_RELOC_X86_64_GOTTPOFF:
6675         case BFD_RELOC_X86_64_TLSLD:
6676           /* Don't allow KMOV in TLS code sequences.  */
6677           if (t->opcode_modifier.vex)
6678             continue;
6679           break;
6680         default:
6681           break;
6682         }
6683
6684       /* We check register size if needed.  */
6685       if (t->opcode_modifier.checkregsize)
6686         {
6687           check_register = (1 << t->operands) - 1;
6688           if (i.broadcast.type || i.broadcast.bytes)
6689             check_register &= ~(1 << i.broadcast.operand);
6690         }
6691       else
6692         check_register = 0;
6693
6694       overlap0 = operand_type_and (i.types[0], operand_types[0]);
6695       switch (t->operands)
6696         {
6697         case 1:
6698           if (!operand_type_match (overlap0, i.types[0]))
6699             continue;
6700           break;
6701         case 2:
6702           /* xchg %eax, %eax is a special case. It is an alias for nop
6703              only in 32bit mode and we can use opcode 0x90.  In 64bit
6704              mode, we can't use 0x90 for xchg %eax, %eax since it should
6705              zero-extend %eax to %rax.  */
6706           if (flag_code == CODE_64BIT
6707               && t->base_opcode == 0x90
6708               && t->opcode_modifier.opcodespace == SPACE_BASE
6709               && i.types[0].bitfield.instance == Accum
6710               && i.types[0].bitfield.dword
6711               && i.types[1].bitfield.instance == Accum
6712               && i.types[1].bitfield.dword)
6713             continue;
6714           /* xrelease mov %eax, <disp> is another special case. It must not
6715              match the accumulator-only encoding of mov.  */
6716           if (flag_code != CODE_64BIT
6717               && i.hle_prefix
6718               && t->base_opcode == 0xa0
6719               && t->opcode_modifier.opcodespace == SPACE_BASE
6720               && i.types[0].bitfield.instance == Accum
6721               && (i.flags[1] & Operand_Mem))
6722             continue;
6723           /* Fall through.  */
6724
6725         case 3:
6726           if (!(size_match & MATCH_STRAIGHT))
6727             goto check_reverse;
6728           /* Reverse direction of operands if swapping is possible in the first
6729              place (operands need to be symmetric) and
6730              - the load form is requested, and the template is a store form,
6731              - the store form is requested, and the template is a load form,
6732              - the non-default (swapped) form is requested.  */
6733           overlap1 = operand_type_and (operand_types[0], operand_types[1]);
6734           if (t->opcode_modifier.d && i.reg_operands == i.operands
6735               && !operand_type_all_zero (&overlap1))
6736             switch (i.dir_encoding)
6737               {
6738               case dir_encoding_load:
6739                 if (operand_type_check (operand_types[i.operands - 1], anymem)
6740                     || t->opcode_modifier.regmem)
6741                   goto check_reverse;
6742                 break;
6743
6744               case dir_encoding_store:
6745                 if (!operand_type_check (operand_types[i.operands - 1], anymem)
6746                     && !t->opcode_modifier.regmem)
6747                   goto check_reverse;
6748                 break;
6749
6750               case dir_encoding_swap:
6751                 goto check_reverse;
6752
6753               case dir_encoding_default:
6754                 break;
6755               }
6756           /* If we want store form, we skip the current load.  */
6757           if ((i.dir_encoding == dir_encoding_store
6758                || i.dir_encoding == dir_encoding_swap)
6759               && i.mem_operands == 0
6760               && t->opcode_modifier.load)
6761             continue;
6762           /* Fall through.  */
6763         case 4:
6764         case 5:
6765           overlap1 = operand_type_and (i.types[1], operand_types[1]);
6766           if (!operand_type_match (overlap0, i.types[0])
6767               || !operand_type_match (overlap1, i.types[1])
6768               || ((check_register & 3) == 3
6769                   && !operand_type_register_match (i.types[0],
6770                                                    operand_types[0],
6771                                                    i.types[1],
6772                                                    operand_types[1])))
6773             {
6774               specific_error = progress (i.error);
6775
6776               /* Check if other direction is valid ...  */
6777               if (!t->opcode_modifier.d)
6778                 continue;
6779
6780             check_reverse:
6781               if (!(size_match & MATCH_REVERSE))
6782                 continue;
6783               /* Try reversing direction of operands.  */
6784               j = t->opcode_modifier.vexsources ? 1 : i.operands - 1;
6785               overlap0 = operand_type_and (i.types[0], operand_types[j]);
6786               overlap1 = operand_type_and (i.types[j], operand_types[0]);
6787               overlap2 = operand_type_and (i.types[1], operand_types[1]);
6788               gas_assert (t->operands != 3 || !check_register);
6789               if (!operand_type_match (overlap0, i.types[0])
6790                   || !operand_type_match (overlap1, i.types[j])
6791                   || (t->operands == 3
6792                       && !operand_type_match (overlap2, i.types[1]))
6793                   || (check_register
6794                       && !operand_type_register_match (i.types[0],
6795                                                        operand_types[j],
6796                                                        i.types[j],
6797                                                        operand_types[0])))
6798                 {
6799                   /* Does not match either direction.  */
6800                   specific_error = progress (i.error);
6801                   continue;
6802                 }
6803               /* found_reverse_match holds which variant of D
6804                  we've found.  */
6805               if (!t->opcode_modifier.d)
6806                 found_reverse_match = 0;
6807               else if (operand_types[0].bitfield.tbyte)
6808                 {
6809                   found_reverse_match = Opcode_FloatD;
6810                   /* FSUB{,R} and FDIV{,R} may need a 2nd bit flipped.  */
6811                   if ((t->base_opcode & 0x20)
6812                       && (intel_syntax || intel_mnemonic))
6813                     found_reverse_match |= Opcode_FloatR;
6814                 }
6815               else if (t->opcode_modifier.vexsources)
6816                 {
6817                   found_reverse_match = Opcode_VexW;
6818                   goto check_operands_345;
6819                 }
6820               else if (t->opcode_modifier.opcodespace != SPACE_BASE
6821                        && (t->opcode_modifier.opcodespace != SPACE_0F
6822                            /* MOV to/from CR/DR/TR, as an exception, follow
6823                               the base opcode space encoding model.  */
6824                            || (t->base_opcode | 7) != 0x27))
6825                 found_reverse_match = (t->base_opcode & 0xee) != 0x6e
6826                                       ? Opcode_ExtD : Opcode_SIMD_IntD;
6827               else
6828                 found_reverse_match = Opcode_D;
6829             }
6830           else
6831             {
6832               /* Found a forward 2 operand match here.  */
6833             check_operands_345:
6834               switch (t->operands)
6835                 {
6836                 case 5:
6837                   overlap4 = operand_type_and (i.types[4], operand_types[4]);
6838                   if (!operand_type_match (overlap4, i.types[4])
6839                       || !operand_type_register_match (i.types[3],
6840                                                        operand_types[3],
6841                                                        i.types[4],
6842                                                        operand_types[4]))
6843                     {
6844                       specific_error = progress (i.error);
6845                       continue;
6846                     }
6847                   /* Fall through.  */
6848                 case 4:
6849                   overlap3 = operand_type_and (i.types[3], operand_types[3]);
6850                   if (!operand_type_match (overlap3, i.types[3])
6851                       || ((check_register & 0xa) == 0xa
6852                           && !operand_type_register_match (i.types[1],
6853                                                             operand_types[1],
6854                                                             i.types[3],
6855                                                             operand_types[3]))
6856                       || ((check_register & 0xc) == 0xc
6857                           && !operand_type_register_match (i.types[2],
6858                                                             operand_types[2],
6859                                                             i.types[3],
6860                                                             operand_types[3])))
6861                     {
6862                       specific_error = progress (i.error);
6863                       continue;
6864                     }
6865                   /* Fall through.  */
6866                 case 3:
6867                   overlap2 = operand_type_and (i.types[2], operand_types[2]);
6868                   if (!operand_type_match (overlap2, i.types[2])
6869                       || ((check_register & 5) == 5
6870                           && !operand_type_register_match (i.types[0],
6871                                                             operand_types[0],
6872                                                             i.types[2],
6873                                                             operand_types[2]))
6874                       || ((check_register & 6) == 6
6875                           && !operand_type_register_match (i.types[1],
6876                                                             operand_types[1],
6877                                                             i.types[2],
6878                                                             operand_types[2])))
6879                     {
6880                       specific_error = progress (i.error);
6881                       continue;
6882                     }
6883                   break;
6884                 }
6885             }
6886           /* Found either forward/reverse 2, 3 or 4 operand match here:
6887              slip through to break.  */
6888         }
6889
6890       /* Check if VEX/EVEX encoding requirements can be satisfied.  */
6891       if (VEX_check_encoding (t))
6892         {
6893           specific_error = progress (i.error);
6894           continue;
6895         }
6896
6897       /* Check if vector operands are valid.  */
6898       if (check_VecOperands (t))
6899         {
6900           specific_error = progress (i.error);
6901           continue;
6902         }
6903
6904       /* We've found a match; break out of loop.  */
6905       break;
6906     }
6907
6908 #undef progress
6909
6910   if (t == current_templates->end)
6911     {
6912       /* We found no match.  */
6913       const char *err_msg;
6914       switch (specific_error)
6915         {
6916         default:
6917           abort ();
6918         case operand_size_mismatch:
6919           err_msg = _("operand size mismatch");
6920           break;
6921         case operand_type_mismatch:
6922           err_msg = _("operand type mismatch");
6923           break;
6924         case register_type_mismatch:
6925           err_msg = _("register type mismatch");
6926           break;
6927         case number_of_operands_mismatch:
6928           err_msg = _("number of operands mismatch");
6929           break;
6930         case invalid_instruction_suffix:
6931           err_msg = _("invalid instruction suffix");
6932           break;
6933         case bad_imm4:
6934           err_msg = _("constant doesn't fit in 4 bits");
6935           break;
6936         case unsupported_with_intel_mnemonic:
6937           err_msg = _("unsupported with Intel mnemonic");
6938           break;
6939         case unsupported_syntax:
6940           err_msg = _("unsupported syntax");
6941           break;
6942         case unsupported:
6943           as_bad (_("unsupported instruction `%s'"),
6944                   current_templates->start->name);
6945           return NULL;
6946         case invalid_sib_address:
6947           err_msg = _("invalid SIB address");
6948           break;
6949         case invalid_vsib_address:
6950           err_msg = _("invalid VSIB address");
6951           break;
6952         case invalid_vector_register_set:
6953           err_msg = _("mask, index, and destination registers must be distinct");
6954           break;
6955         case invalid_tmm_register_set:
6956           err_msg = _("all tmm registers must be distinct");
6957           break;
6958         case invalid_dest_and_src_register_set:
6959           err_msg = _("destination and source registers must be distinct");
6960           break;
6961         case unsupported_vector_index_register:
6962           err_msg = _("unsupported vector index register");
6963           break;
6964         case unsupported_broadcast:
6965           err_msg = _("unsupported broadcast");
6966           break;
6967         case broadcast_needed:
6968           err_msg = _("broadcast is needed for operand of such type");
6969           break;
6970         case unsupported_masking:
6971           err_msg = _("unsupported masking");
6972           break;
6973         case mask_not_on_destination:
6974           err_msg = _("mask not on destination operand");
6975           break;
6976         case no_default_mask:
6977           err_msg = _("default mask isn't allowed");
6978           break;
6979         case unsupported_rc_sae:
6980           err_msg = _("unsupported static rounding/sae");
6981           break;
6982         case invalid_register_operand:
6983           err_msg = _("invalid register operand");
6984           break;
6985         }
6986       as_bad (_("%s for `%s'"), err_msg,
6987               current_templates->start->name);
6988       return NULL;
6989     }
6990
6991   if (!quiet_warnings)
6992     {
6993       if (!intel_syntax
6994           && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
6995         as_warn (_("indirect %s without `*'"), t->name);
6996
6997       if (t->opcode_modifier.isprefix
6998           && t->opcode_modifier.mnemonicsize == IGNORESIZE)
6999         {
7000           /* Warn them that a data or address size prefix doesn't
7001              affect assembly of the next line of code.  */
7002           as_warn (_("stand-alone `%s' prefix"), t->name);
7003         }
7004     }
7005
7006   /* Copy the template we found.  */
7007   install_template (t);
7008
7009   if (addr_prefix_disp != -1)
7010     i.tm.operand_types[addr_prefix_disp]
7011       = operand_types[addr_prefix_disp];
7012
7013   switch (found_reverse_match)
7014     {
7015     case 0:
7016       break;
7017
7018     default:
7019       /* If we found a reverse match we must alter the opcode direction
7020          bit and clear/flip the regmem modifier one.  found_reverse_match
7021          holds bits to change (different for int & float insns).  */
7022
7023       i.tm.base_opcode ^= found_reverse_match;
7024
7025       i.tm.operand_types[0] = operand_types[i.operands - 1];
7026       i.tm.operand_types[i.operands - 1] = operand_types[0];
7027
7028       /* Certain SIMD insns have their load forms specified in the opcode
7029          table, and hence we need to _set_ RegMem instead of clearing it.
7030          We need to avoid setting the bit though on insns like KMOVW.  */
7031       i.tm.opcode_modifier.regmem
7032         = i.tm.opcode_modifier.modrm && i.tm.opcode_modifier.d
7033           && i.tm.operands > 2U - i.tm.opcode_modifier.sse2avx
7034           && !i.tm.opcode_modifier.regmem;
7035       break;
7036
7037     case Opcode_VexW:
7038       /* Only the first two register operands need reversing, alongside
7039          flipping VEX.W.  */
7040       i.tm.opcode_modifier.vexw ^= VEXW0 ^ VEXW1;
7041
7042       j = i.tm.operand_types[0].bitfield.imm8;
7043       i.tm.operand_types[j] = operand_types[j + 1];
7044       i.tm.operand_types[j + 1] = operand_types[j];
7045       break;
7046     }
7047
7048   return t;
7049 }
7050
7051 static int
7052 check_string (void)
7053 {
7054   unsigned int es_op = i.tm.opcode_modifier.isstring - IS_STRING_ES_OP0;
7055   unsigned int op = i.tm.operand_types[0].bitfield.baseindex ? es_op : 0;
7056
7057   if (i.seg[op] != NULL && i.seg[op] != reg_es)
7058     {
7059       as_bad (_("`%s' operand %u must use `%ses' segment"),
7060               i.tm.name,
7061               intel_syntax ? i.tm.operands - es_op : es_op + 1,
7062               register_prefix);
7063       return 0;
7064     }
7065
7066   /* There's only ever one segment override allowed per instruction.
7067      This instruction possibly has a legal segment override on the
7068      second operand, so copy the segment to where non-string
7069      instructions store it, allowing common code.  */
7070   i.seg[op] = i.seg[1];
7071
7072   return 1;
7073 }
7074
7075 static int
7076 process_suffix (void)
7077 {
7078   bool is_crc32 = false, is_movx = false;
7079
7080   /* If matched instruction specifies an explicit instruction mnemonic
7081      suffix, use it.  */
7082   if (i.tm.opcode_modifier.size == SIZE16)
7083     i.suffix = WORD_MNEM_SUFFIX;
7084   else if (i.tm.opcode_modifier.size == SIZE32)
7085     i.suffix = LONG_MNEM_SUFFIX;
7086   else if (i.tm.opcode_modifier.size == SIZE64)
7087     i.suffix = QWORD_MNEM_SUFFIX;
7088   else if (i.reg_operands
7089            && (i.operands > 1 || i.types[0].bitfield.class == Reg)
7090            && i.tm.opcode_modifier.operandconstraint != ADDR_PREFIX_OP_REG)
7091     {
7092       unsigned int numop = i.operands;
7093
7094       /* MOVSX/MOVZX */
7095       is_movx = (i.tm.opcode_modifier.opcodespace == SPACE_0F
7096                  && (i.tm.base_opcode | 8) == 0xbe)
7097                 || (i.tm.opcode_modifier.opcodespace == SPACE_BASE
7098                     && i.tm.base_opcode == 0x63
7099                     && i.tm.cpu_flags.bitfield.cpu64);
7100
7101       /* CRC32 */
7102       is_crc32 = (i.tm.base_opcode == 0xf0
7103                   && i.tm.opcode_modifier.opcodespace == SPACE_0F38
7104                   && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2);
7105
7106       /* movsx/movzx want only their source operand considered here, for the
7107          ambiguity checking below.  The suffix will be replaced afterwards
7108          to represent the destination (register).  */
7109       if (is_movx && (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63))
7110         --i.operands;
7111
7112       /* crc32 needs REX.W set regardless of suffix / source operand size.  */
7113       if (is_crc32 && i.tm.operand_types[1].bitfield.qword)
7114         i.rex |= REX_W;
7115
7116       /* If there's no instruction mnemonic suffix we try to invent one
7117          based on GPR operands.  */
7118       if (!i.suffix)
7119         {
7120           /* We take i.suffix from the last register operand specified,
7121              Destination register type is more significant than source
7122              register type.  crc32 in SSE4.2 prefers source register
7123              type. */
7124           unsigned int op = is_crc32 ? 1 : i.operands;
7125
7126           while (op--)
7127             if (i.tm.operand_types[op].bitfield.instance == InstanceNone
7128                 || i.tm.operand_types[op].bitfield.instance == Accum)
7129               {
7130                 if (i.types[op].bitfield.class != Reg)
7131                   continue;
7132                 if (i.types[op].bitfield.byte)
7133                   i.suffix = BYTE_MNEM_SUFFIX;
7134                 else if (i.types[op].bitfield.word)
7135                   i.suffix = WORD_MNEM_SUFFIX;
7136                 else if (i.types[op].bitfield.dword)
7137                   i.suffix = LONG_MNEM_SUFFIX;
7138                 else if (i.types[op].bitfield.qword)
7139                   i.suffix = QWORD_MNEM_SUFFIX;
7140                 else
7141                   continue;
7142                 break;
7143               }
7144
7145           /* As an exception, movsx/movzx silently default to a byte source
7146              in AT&T mode.  */
7147           if (is_movx && i.tm.opcode_modifier.w && !i.suffix && !intel_syntax)
7148             i.suffix = BYTE_MNEM_SUFFIX;
7149         }
7150       else if (i.suffix == BYTE_MNEM_SUFFIX)
7151         {
7152           if (!check_byte_reg ())
7153             return 0;
7154         }
7155       else if (i.suffix == LONG_MNEM_SUFFIX)
7156         {
7157           if (!check_long_reg ())
7158             return 0;
7159         }
7160       else if (i.suffix == QWORD_MNEM_SUFFIX)
7161         {
7162           if (!check_qword_reg ())
7163             return 0;
7164         }
7165       else if (i.suffix == WORD_MNEM_SUFFIX)
7166         {
7167           if (!check_word_reg ())
7168             return 0;
7169         }
7170       else if (intel_syntax
7171                && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
7172         /* Do nothing if the instruction is going to ignore the prefix.  */
7173         ;
7174       else
7175         abort ();
7176
7177       /* Undo the movsx/movzx change done above.  */
7178       i.operands = numop;
7179     }
7180   else if (i.tm.opcode_modifier.mnemonicsize == DEFAULTSIZE
7181            && !i.suffix)
7182     {
7183       i.suffix = stackop_size;
7184       if (stackop_size == LONG_MNEM_SUFFIX)
7185         {
7186           /* stackop_size is set to LONG_MNEM_SUFFIX for the
7187              .code16gcc directive to support 16-bit mode with
7188              32-bit address.  For IRET without a suffix, generate
7189              16-bit IRET (opcode 0xcf) to return from an interrupt
7190              handler.  */
7191           if (i.tm.base_opcode == 0xcf)
7192             {
7193               i.suffix = WORD_MNEM_SUFFIX;
7194               as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
7195             }
7196           /* Warn about changed behavior for segment register push/pop.  */
7197           else if ((i.tm.base_opcode | 1) == 0x07)
7198             as_warn (_("generating 32-bit `%s', unlike earlier gas versions"),
7199                      i.tm.name);
7200         }
7201     }
7202   else if (!i.suffix
7203            && (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE
7204                || i.tm.opcode_modifier.jump == JUMP_BYTE
7205                || i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT
7206                || (i.tm.opcode_modifier.opcodespace == SPACE_0F
7207                    && i.tm.base_opcode == 0x01 /* [ls][gi]dt */
7208                    && i.tm.extension_opcode <= 3)))
7209     {
7210       switch (flag_code)
7211         {
7212         case CODE_64BIT:
7213           if (!i.tm.opcode_modifier.no_qsuf)
7214             {
7215               if (i.tm.opcode_modifier.jump == JUMP_BYTE
7216                   || i.tm.opcode_modifier.no_lsuf)
7217                 i.suffix = QWORD_MNEM_SUFFIX;
7218               break;
7219             }
7220           /* Fall through.  */
7221         case CODE_32BIT:
7222           if (!i.tm.opcode_modifier.no_lsuf)
7223             i.suffix = LONG_MNEM_SUFFIX;
7224           break;
7225         case CODE_16BIT:
7226           if (!i.tm.opcode_modifier.no_wsuf)
7227             i.suffix = WORD_MNEM_SUFFIX;
7228           break;
7229         }
7230     }
7231
7232   if (!i.suffix
7233       && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7234           /* Also cover lret/retf/iret in 64-bit mode.  */
7235           || (flag_code == CODE_64BIT
7236               && !i.tm.opcode_modifier.no_lsuf
7237               && !i.tm.opcode_modifier.no_qsuf))
7238       && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7239       /* Explicit sizing prefixes are assumed to disambiguate insns.  */
7240       && !i.prefix[DATA_PREFIX] && !(i.prefix[REX_PREFIX] & REX_W)
7241       /* Accept FLDENV et al without suffix.  */
7242       && (i.tm.opcode_modifier.no_ssuf || i.tm.opcode_modifier.floatmf))
7243     {
7244       unsigned int suffixes, evex = 0;
7245
7246       suffixes = !i.tm.opcode_modifier.no_bsuf;
7247       if (!i.tm.opcode_modifier.no_wsuf)
7248         suffixes |= 1 << 1;
7249       if (!i.tm.opcode_modifier.no_lsuf)
7250         suffixes |= 1 << 2;
7251       if (!i.tm.opcode_modifier.no_ldsuf)
7252         suffixes |= 1 << 3;
7253       if (!i.tm.opcode_modifier.no_ssuf)
7254         suffixes |= 1 << 4;
7255       if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
7256         suffixes |= 1 << 5;
7257
7258       /* For [XYZ]MMWORD operands inspect operand sizes.  While generally
7259          also suitable for AT&T syntax mode, it was requested that this be
7260          restricted to just Intel syntax.  */
7261       if (intel_syntax && is_any_vex_encoding (&i.tm)
7262           && !i.broadcast.type && !i.broadcast.bytes)
7263         {
7264           unsigned int op;
7265
7266           for (op = 0; op < i.tm.operands; ++op)
7267             {
7268               if (is_evex_encoding (&i.tm)
7269                   && !cpu_arch_flags.bitfield.cpuavx512vl)
7270                 {
7271                   if (i.tm.operand_types[op].bitfield.ymmword)
7272                     i.tm.operand_types[op].bitfield.xmmword = 0;
7273                   if (i.tm.operand_types[op].bitfield.zmmword)
7274                     i.tm.operand_types[op].bitfield.ymmword = 0;
7275                   if (!i.tm.opcode_modifier.evex
7276                       || i.tm.opcode_modifier.evex == EVEXDYN)
7277                     i.tm.opcode_modifier.evex = EVEX512;
7278                 }
7279
7280               if (i.tm.operand_types[op].bitfield.xmmword
7281                   + i.tm.operand_types[op].bitfield.ymmword
7282                   + i.tm.operand_types[op].bitfield.zmmword < 2)
7283                 continue;
7284
7285               /* Any properly sized operand disambiguates the insn.  */
7286               if (i.types[op].bitfield.xmmword
7287                   || i.types[op].bitfield.ymmword
7288                   || i.types[op].bitfield.zmmword)
7289                 {
7290                   suffixes &= ~(7 << 6);
7291                   evex = 0;
7292                   break;
7293                 }
7294
7295               if ((i.flags[op] & Operand_Mem)
7296                   && i.tm.operand_types[op].bitfield.unspecified)
7297                 {
7298                   if (i.tm.operand_types[op].bitfield.xmmword)
7299                     suffixes |= 1 << 6;
7300                   if (i.tm.operand_types[op].bitfield.ymmword)
7301                     suffixes |= 1 << 7;
7302                   if (i.tm.operand_types[op].bitfield.zmmword)
7303                     suffixes |= 1 << 8;
7304                   if (is_evex_encoding (&i.tm))
7305                     evex = EVEX512;
7306                 }
7307             }
7308         }
7309
7310       /* Are multiple suffixes / operand sizes allowed?  */
7311       if (suffixes & (suffixes - 1))
7312         {
7313           if (intel_syntax
7314               && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7315                   || operand_check == check_error))
7316             {
7317               as_bad (_("ambiguous operand size for `%s'"), i.tm.name);
7318               return 0;
7319             }
7320           if (operand_check == check_error)
7321             {
7322               as_bad (_("no instruction mnemonic suffix given and "
7323                         "no register operands; can't size `%s'"), i.tm.name);
7324               return 0;
7325             }
7326           if (operand_check == check_warning)
7327             as_warn (_("%s; using default for `%s'"),
7328                        intel_syntax
7329                        ? _("ambiguous operand size")
7330                        : _("no instruction mnemonic suffix given and "
7331                            "no register operands"),
7332                        i.tm.name);
7333
7334           if (i.tm.opcode_modifier.floatmf)
7335             i.suffix = SHORT_MNEM_SUFFIX;
7336           else if (is_movx)
7337             /* handled below */;
7338           else if (evex)
7339             i.tm.opcode_modifier.evex = evex;
7340           else if (flag_code == CODE_16BIT)
7341             i.suffix = WORD_MNEM_SUFFIX;
7342           else if (!i.tm.opcode_modifier.no_lsuf)
7343             i.suffix = LONG_MNEM_SUFFIX;
7344           else
7345             i.suffix = QWORD_MNEM_SUFFIX;
7346         }
7347     }
7348
7349   if (is_movx)
7350     {
7351       /* In Intel syntax, movsx/movzx must have a "suffix" (checked above).
7352          In AT&T syntax, if there is no suffix (warned about above), the default
7353          will be byte extension.  */
7354       if (i.tm.opcode_modifier.w && i.suffix && i.suffix != BYTE_MNEM_SUFFIX)
7355         i.tm.base_opcode |= 1;
7356
7357       /* For further processing, the suffix should represent the destination
7358          (register).  This is already the case when one was used with
7359          mov[sz][bw]*, but we need to replace it for mov[sz]x, or if there was
7360          no suffix to begin with.  */
7361       if (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63 || !i.suffix)
7362         {
7363           if (i.types[1].bitfield.word)
7364             i.suffix = WORD_MNEM_SUFFIX;
7365           else if (i.types[1].bitfield.qword)
7366             i.suffix = QWORD_MNEM_SUFFIX;
7367           else
7368             i.suffix = LONG_MNEM_SUFFIX;
7369
7370           i.tm.opcode_modifier.w = 0;
7371         }
7372     }
7373
7374   if (!i.tm.opcode_modifier.modrm && i.reg_operands && i.tm.operands < 3)
7375     i.short_form = (i.tm.operand_types[0].bitfield.class == Reg)
7376                    != (i.tm.operand_types[1].bitfield.class == Reg);
7377
7378   /* Change the opcode based on the operand size given by i.suffix.  */
7379   switch (i.suffix)
7380     {
7381     /* Size floating point instruction.  */
7382     case LONG_MNEM_SUFFIX:
7383       if (i.tm.opcode_modifier.floatmf)
7384         {
7385           i.tm.base_opcode ^= 4;
7386           break;
7387         }
7388     /* fall through */
7389     case WORD_MNEM_SUFFIX:
7390     case QWORD_MNEM_SUFFIX:
7391       /* It's not a byte, select word/dword operation.  */
7392       if (i.tm.opcode_modifier.w)
7393         {
7394           if (i.short_form)
7395             i.tm.base_opcode |= 8;
7396           else
7397             i.tm.base_opcode |= 1;
7398         }
7399     /* fall through */
7400     case SHORT_MNEM_SUFFIX:
7401       /* Now select between word & dword operations via the operand
7402          size prefix, except for instructions that will ignore this
7403          prefix anyway.  */
7404       if (i.suffix != QWORD_MNEM_SUFFIX
7405           && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7406           && !i.tm.opcode_modifier.floatmf
7407           && !is_any_vex_encoding (&i.tm)
7408           && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
7409               || (flag_code == CODE_64BIT
7410                   && i.tm.opcode_modifier.jump == JUMP_BYTE)))
7411         {
7412           unsigned int prefix = DATA_PREFIX_OPCODE;
7413
7414           if (i.tm.opcode_modifier.jump == JUMP_BYTE) /* jcxz, loop */
7415             prefix = ADDR_PREFIX_OPCODE;
7416
7417           if (!add_prefix (prefix))
7418             return 0;
7419         }
7420
7421       /* Set mode64 for an operand.  */
7422       if (i.suffix == QWORD_MNEM_SUFFIX
7423           && flag_code == CODE_64BIT
7424           && !i.tm.opcode_modifier.norex64
7425           && !i.tm.opcode_modifier.vexw
7426           /* Special case for xchg %rax,%rax.  It is NOP and doesn't
7427              need rex64. */
7428           && ! (i.operands == 2
7429                 && i.tm.base_opcode == 0x90
7430                 && i.tm.extension_opcode == None
7431                 && i.types[0].bitfield.instance == Accum
7432                 && i.types[0].bitfield.qword
7433                 && i.types[1].bitfield.instance == Accum
7434                 && i.types[1].bitfield.qword))
7435         i.rex |= REX_W;
7436
7437       break;
7438
7439     case 0:
7440       /* Select word/dword/qword operation with explicit data sizing prefix
7441          when there are no suitable register operands.  */
7442       if (i.tm.opcode_modifier.w
7443           && (i.prefix[DATA_PREFIX] || (i.prefix[REX_PREFIX] & REX_W))
7444           && (!i.reg_operands
7445               || (i.reg_operands == 1
7446                       /* ShiftCount */
7447                   && (i.tm.operand_types[0].bitfield.instance == RegC
7448                       /* InOutPortReg */
7449                       || i.tm.operand_types[0].bitfield.instance == RegD
7450                       || i.tm.operand_types[1].bitfield.instance == RegD
7451                       /* CRC32 */
7452                       || is_crc32))))
7453         i.tm.base_opcode |= 1;
7454       break;
7455     }
7456
7457   if (i.tm.opcode_modifier.operandconstraint == ADDR_PREFIX_OP_REG)
7458     {
7459       gas_assert (!i.suffix);
7460       gas_assert (i.reg_operands);
7461
7462       if (i.tm.operand_types[0].bitfield.instance == Accum
7463           || i.operands == 1)
7464         {
7465           /* The address size override prefix changes the size of the
7466              first operand.  */
7467           if (flag_code == CODE_64BIT
7468               && i.op[0].regs->reg_type.bitfield.word)
7469             {
7470               as_bad (_("16-bit addressing unavailable for `%s'"),
7471                       i.tm.name);
7472               return 0;
7473             }
7474
7475           if ((flag_code == CODE_32BIT
7476                ? i.op[0].regs->reg_type.bitfield.word
7477                : i.op[0].regs->reg_type.bitfield.dword)
7478               && !add_prefix (ADDR_PREFIX_OPCODE))
7479             return 0;
7480         }
7481       else
7482         {
7483           /* Check invalid register operand when the address size override
7484              prefix changes the size of register operands.  */
7485           unsigned int op;
7486           enum { need_word, need_dword, need_qword } need;
7487
7488           /* Check the register operand for the address size prefix if
7489              the memory operand has no real registers, like symbol, DISP
7490              or bogus (x32-only) symbol(%rip) when symbol(%eip) is meant.  */
7491           if (i.mem_operands == 1
7492               && i.reg_operands == 1
7493               && i.operands == 2
7494               && i.types[1].bitfield.class == Reg
7495               && (flag_code == CODE_32BIT
7496                   ? i.op[1].regs->reg_type.bitfield.word
7497                   : i.op[1].regs->reg_type.bitfield.dword)
7498               && ((i.base_reg == NULL && i.index_reg == NULL)
7499 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
7500                   || (x86_elf_abi == X86_64_X32_ABI
7501                       && i.base_reg
7502                       && i.base_reg->reg_num == RegIP
7503                       && i.base_reg->reg_type.bitfield.qword))
7504 #else
7505                   || 0)
7506 #endif
7507               && !add_prefix (ADDR_PREFIX_OPCODE))
7508             return 0;
7509
7510           if (flag_code == CODE_32BIT)
7511             need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
7512           else if (i.prefix[ADDR_PREFIX])
7513             need = need_dword;
7514           else
7515             need = flag_code == CODE_64BIT ? need_qword : need_word;
7516
7517           for (op = 0; op < i.operands; op++)
7518             {
7519               if (i.types[op].bitfield.class != Reg)
7520                 continue;
7521
7522               switch (need)
7523                 {
7524                 case need_word:
7525                   if (i.op[op].regs->reg_type.bitfield.word)
7526                     continue;
7527                   break;
7528                 case need_dword:
7529                   if (i.op[op].regs->reg_type.bitfield.dword)
7530                     continue;
7531                   break;
7532                 case need_qword:
7533                   if (i.op[op].regs->reg_type.bitfield.qword)
7534                     continue;
7535                   break;
7536                 }
7537
7538               as_bad (_("invalid register operand size for `%s'"),
7539                       i.tm.name);
7540               return 0;
7541             }
7542         }
7543     }
7544
7545   return 1;
7546 }
7547
7548 static int
7549 check_byte_reg (void)
7550 {
7551   int op;
7552
7553   for (op = i.operands; --op >= 0;)
7554     {
7555       /* Skip non-register operands. */
7556       if (i.types[op].bitfield.class != Reg)
7557         continue;
7558
7559       /* If this is an eight bit register, it's OK.  If it's the 16 or
7560          32 bit version of an eight bit register, we will just use the
7561          low portion, and that's OK too.  */
7562       if (i.types[op].bitfield.byte)
7563         continue;
7564
7565       /* I/O port address operands are OK too.  */
7566       if (i.tm.operand_types[op].bitfield.instance == RegD
7567           && i.tm.operand_types[op].bitfield.word)
7568         continue;
7569
7570       /* crc32 only wants its source operand checked here.  */
7571       if (i.tm.base_opcode == 0xf0
7572           && i.tm.opcode_modifier.opcodespace == SPACE_0F38
7573           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2
7574           && op != 0)
7575         continue;
7576
7577       /* Any other register is bad.  */
7578       as_bad (_("`%s%s' not allowed with `%s%c'"),
7579               register_prefix, i.op[op].regs->reg_name,
7580               i.tm.name, i.suffix);
7581       return 0;
7582     }
7583   return 1;
7584 }
7585
7586 static int
7587 check_long_reg (void)
7588 {
7589   int op;
7590
7591   for (op = i.operands; --op >= 0;)
7592     /* Skip non-register operands. */
7593     if (i.types[op].bitfield.class != Reg)
7594       continue;
7595     /* Reject eight bit registers, except where the template requires
7596        them. (eg. movzb)  */
7597     else if (i.types[op].bitfield.byte
7598              && (i.tm.operand_types[op].bitfield.class == Reg
7599                  || i.tm.operand_types[op].bitfield.instance == Accum)
7600              && (i.tm.operand_types[op].bitfield.word
7601                  || i.tm.operand_types[op].bitfield.dword))
7602       {
7603         as_bad (_("`%s%s' not allowed with `%s%c'"),
7604                 register_prefix,
7605                 i.op[op].regs->reg_name,
7606                 i.tm.name,
7607                 i.suffix);
7608         return 0;
7609       }
7610     /* Error if the e prefix on a general reg is missing.  */
7611     else if (i.types[op].bitfield.word
7612              && (i.tm.operand_types[op].bitfield.class == Reg
7613                  || i.tm.operand_types[op].bitfield.instance == Accum)
7614              && i.tm.operand_types[op].bitfield.dword)
7615       {
7616         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7617                 register_prefix, i.op[op].regs->reg_name,
7618                 i.suffix);
7619         return 0;
7620       }
7621     /* Warn if the r prefix on a general reg is present.  */
7622     else if (i.types[op].bitfield.qword
7623              && (i.tm.operand_types[op].bitfield.class == Reg
7624                  || i.tm.operand_types[op].bitfield.instance == Accum)
7625              && i.tm.operand_types[op].bitfield.dword)
7626       {
7627         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7628                 register_prefix, i.op[op].regs->reg_name, i.suffix);
7629         return 0;
7630       }
7631   return 1;
7632 }
7633
7634 static int
7635 check_qword_reg (void)
7636 {
7637   int op;
7638
7639   for (op = i.operands; --op >= 0; )
7640     /* Skip non-register operands. */
7641     if (i.types[op].bitfield.class != Reg)
7642       continue;
7643     /* Reject eight bit registers, except where the template requires
7644        them. (eg. movzb)  */
7645     else if (i.types[op].bitfield.byte
7646              && (i.tm.operand_types[op].bitfield.class == Reg
7647                  || i.tm.operand_types[op].bitfield.instance == Accum)
7648              && (i.tm.operand_types[op].bitfield.word
7649                  || i.tm.operand_types[op].bitfield.dword))
7650       {
7651         as_bad (_("`%s%s' not allowed with `%s%c'"),
7652                 register_prefix,
7653                 i.op[op].regs->reg_name,
7654                 i.tm.name,
7655                 i.suffix);
7656         return 0;
7657       }
7658     /* Warn if the r prefix on a general reg is missing.  */
7659     else if ((i.types[op].bitfield.word
7660               || i.types[op].bitfield.dword)
7661              && (i.tm.operand_types[op].bitfield.class == Reg
7662                  || i.tm.operand_types[op].bitfield.instance == Accum)
7663              && i.tm.operand_types[op].bitfield.qword)
7664       {
7665         /* Prohibit these changes in the 64bit mode, since the
7666            lowering is more complicated.  */
7667         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7668                 register_prefix, i.op[op].regs->reg_name, i.suffix);
7669         return 0;
7670       }
7671   return 1;
7672 }
7673
7674 static int
7675 check_word_reg (void)
7676 {
7677   int op;
7678   for (op = i.operands; --op >= 0;)
7679     /* Skip non-register operands. */
7680     if (i.types[op].bitfield.class != Reg)
7681       continue;
7682     /* Reject eight bit registers, except where the template requires
7683        them. (eg. movzb)  */
7684     else if (i.types[op].bitfield.byte
7685              && (i.tm.operand_types[op].bitfield.class == Reg
7686                  || i.tm.operand_types[op].bitfield.instance == Accum)
7687              && (i.tm.operand_types[op].bitfield.word
7688                  || i.tm.operand_types[op].bitfield.dword))
7689       {
7690         as_bad (_("`%s%s' not allowed with `%s%c'"),
7691                 register_prefix,
7692                 i.op[op].regs->reg_name,
7693                 i.tm.name,
7694                 i.suffix);
7695         return 0;
7696       }
7697     /* Error if the e or r prefix on a general reg is present.  */
7698     else if ((i.types[op].bitfield.dword
7699                  || i.types[op].bitfield.qword)
7700              && (i.tm.operand_types[op].bitfield.class == Reg
7701                  || i.tm.operand_types[op].bitfield.instance == Accum)
7702              && i.tm.operand_types[op].bitfield.word)
7703       {
7704         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7705                 register_prefix, i.op[op].regs->reg_name,
7706                 i.suffix);
7707         return 0;
7708       }
7709   return 1;
7710 }
7711
7712 static int
7713 update_imm (unsigned int j)
7714 {
7715   i386_operand_type overlap = i.types[j];
7716   if (overlap.bitfield.imm8
7717       + overlap.bitfield.imm8s
7718       + overlap.bitfield.imm16
7719       + overlap.bitfield.imm32
7720       + overlap.bitfield.imm32s
7721       + overlap.bitfield.imm64 > 1)
7722     {
7723       if (i.suffix)
7724         {
7725           i386_operand_type temp;
7726
7727           operand_type_set (&temp, 0);
7728           if (i.suffix == BYTE_MNEM_SUFFIX)
7729             {
7730               temp.bitfield.imm8 = overlap.bitfield.imm8;
7731               temp.bitfield.imm8s = overlap.bitfield.imm8s;
7732             }
7733           else if (i.suffix == WORD_MNEM_SUFFIX)
7734             temp.bitfield.imm16 = overlap.bitfield.imm16;
7735           else if (i.suffix == QWORD_MNEM_SUFFIX)
7736             {
7737               temp.bitfield.imm64 = overlap.bitfield.imm64;
7738               temp.bitfield.imm32s = overlap.bitfield.imm32s;
7739             }
7740           else
7741             temp.bitfield.imm32 = overlap.bitfield.imm32;
7742           overlap = temp;
7743         }
7744       else if (operand_type_equal (&overlap, &imm16_32_32s)
7745                || operand_type_equal (&overlap, &imm16_32)
7746                || operand_type_equal (&overlap, &imm16_32s))
7747         {
7748           if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
7749             overlap = imm16;
7750           else
7751             overlap = imm32s;
7752         }
7753       else if (i.prefix[REX_PREFIX] & REX_W)
7754         overlap = operand_type_and (overlap, imm32s);
7755       else if (i.prefix[DATA_PREFIX])
7756         overlap = operand_type_and (overlap,
7757                                     flag_code != CODE_16BIT ? imm16 : imm32);
7758       if (overlap.bitfield.imm8
7759           + overlap.bitfield.imm8s
7760           + overlap.bitfield.imm16
7761           + overlap.bitfield.imm32
7762           + overlap.bitfield.imm32s
7763           + overlap.bitfield.imm64 != 1)
7764         {
7765           as_bad (_("no instruction mnemonic suffix given; "
7766                     "can't determine immediate size"));
7767           return 0;
7768         }
7769     }
7770   i.types[j] = overlap;
7771
7772   return 1;
7773 }
7774
7775 static int
7776 finalize_imm (void)
7777 {
7778   unsigned int j, n;
7779
7780   /* Update the first 2 immediate operands.  */
7781   n = i.operands > 2 ? 2 : i.operands;
7782   if (n)
7783     {
7784       for (j = 0; j < n; j++)
7785         if (update_imm (j) == 0)
7786           return 0;
7787
7788       /* The 3rd operand can't be immediate operand.  */
7789       gas_assert (operand_type_check (i.types[2], imm) == 0);
7790     }
7791
7792   return 1;
7793 }
7794
7795 static int
7796 process_operands (void)
7797 {
7798   /* Default segment register this instruction will use for memory
7799      accesses.  0 means unknown.  This is only for optimizing out
7800      unnecessary segment overrides.  */
7801   const reg_entry *default_seg = NULL;
7802
7803   if (i.tm.opcode_modifier.sse2avx)
7804     {
7805       /* Legacy encoded insns allow explicit REX prefixes, so these prefixes
7806          need converting.  */
7807       i.rex |= i.prefix[REX_PREFIX] & (REX_W | REX_R | REX_X | REX_B);
7808       i.prefix[REX_PREFIX] = 0;
7809       i.rex_encoding = 0;
7810     }
7811   /* ImmExt should be processed after SSE2AVX.  */
7812   else if (i.tm.opcode_modifier.immext)
7813     process_immext ();
7814
7815   if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
7816     {
7817       unsigned int dupl = i.operands;
7818       unsigned int dest = dupl - 1;
7819       unsigned int j;
7820
7821       /* The destination must be an xmm register.  */
7822       gas_assert (i.reg_operands
7823                   && MAX_OPERANDS > dupl
7824                   && operand_type_equal (&i.types[dest], &regxmm));
7825
7826       if (i.tm.operand_types[0].bitfield.instance == Accum
7827           && i.tm.operand_types[0].bitfield.xmmword)
7828         {
7829           if (i.tm.opcode_modifier.vexsources == VEX3SOURCES)
7830             {
7831               /* Keep xmm0 for instructions with VEX prefix and 3
7832                  sources.  */
7833               i.tm.operand_types[0].bitfield.instance = InstanceNone;
7834               i.tm.operand_types[0].bitfield.class = RegSIMD;
7835               goto duplicate;
7836             }
7837           else
7838             {
7839               /* We remove the first xmm0 and keep the number of
7840                  operands unchanged, which in fact duplicates the
7841                  destination.  */
7842               for (j = 1; j < i.operands; j++)
7843                 {
7844                   i.op[j - 1] = i.op[j];
7845                   i.types[j - 1] = i.types[j];
7846                   i.tm.operand_types[j - 1] = i.tm.operand_types[j];
7847                   i.flags[j - 1] = i.flags[j];
7848                 }
7849             }
7850         }
7851       else if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_1ST_XMM0)
7852         {
7853           gas_assert ((MAX_OPERANDS - 1) > dupl
7854                       && (i.tm.opcode_modifier.vexsources
7855                           == VEX3SOURCES));
7856
7857           /* Add the implicit xmm0 for instructions with VEX prefix
7858              and 3 sources.  */
7859           for (j = i.operands; j > 0; j--)
7860             {
7861               i.op[j] = i.op[j - 1];
7862               i.types[j] = i.types[j - 1];
7863               i.tm.operand_types[j] = i.tm.operand_types[j - 1];
7864               i.flags[j] = i.flags[j - 1];
7865             }
7866           i.op[0].regs
7867             = (const reg_entry *) str_hash_find (reg_hash, "xmm0");
7868           i.types[0] = regxmm;
7869           i.tm.operand_types[0] = regxmm;
7870
7871           i.operands += 2;
7872           i.reg_operands += 2;
7873           i.tm.operands += 2;
7874
7875           dupl++;
7876           dest++;
7877           i.op[dupl] = i.op[dest];
7878           i.types[dupl] = i.types[dest];
7879           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
7880           i.flags[dupl] = i.flags[dest];
7881         }
7882       else
7883         {
7884         duplicate:
7885           i.operands++;
7886           i.reg_operands++;
7887           i.tm.operands++;
7888
7889           i.op[dupl] = i.op[dest];
7890           i.types[dupl] = i.types[dest];
7891           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
7892           i.flags[dupl] = i.flags[dest];
7893         }
7894
7895        if (i.tm.opcode_modifier.immext)
7896          process_immext ();
7897     }
7898   else if (i.tm.operand_types[0].bitfield.instance == Accum
7899            && i.tm.operand_types[0].bitfield.xmmword)
7900     {
7901       unsigned int j;
7902
7903       for (j = 1; j < i.operands; j++)
7904         {
7905           i.op[j - 1] = i.op[j];
7906           i.types[j - 1] = i.types[j];
7907
7908           /* We need to adjust fields in i.tm since they are used by
7909              build_modrm_byte.  */
7910           i.tm.operand_types [j - 1] = i.tm.operand_types [j];
7911
7912           i.flags[j - 1] = i.flags[j];
7913         }
7914
7915       i.operands--;
7916       i.reg_operands--;
7917       i.tm.operands--;
7918     }
7919   else if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_QUAD_GROUP)
7920     {
7921       unsigned int regnum, first_reg_in_group, last_reg_in_group;
7922
7923       /* The second operand must be {x,y,z}mmN, where N is a multiple of 4. */
7924       gas_assert (i.operands >= 2 && i.types[1].bitfield.class == RegSIMD);
7925       regnum = register_number (i.op[1].regs);
7926       first_reg_in_group = regnum & ~3;
7927       last_reg_in_group = first_reg_in_group + 3;
7928       if (regnum != first_reg_in_group)
7929         as_warn (_("source register `%s%s' implicitly denotes"
7930                    " `%s%.3s%u' to `%s%.3s%u' source group in `%s'"),
7931                  register_prefix, i.op[1].regs->reg_name,
7932                  register_prefix, i.op[1].regs->reg_name, first_reg_in_group,
7933                  register_prefix, i.op[1].regs->reg_name, last_reg_in_group,
7934                  i.tm.name);
7935     }
7936   else if (i.tm.opcode_modifier.operandconstraint == REG_KLUDGE)
7937     {
7938       /* The imul $imm, %reg instruction is converted into
7939          imul $imm, %reg, %reg, and the clr %reg instruction
7940          is converted into xor %reg, %reg.  */
7941
7942       unsigned int first_reg_op;
7943
7944       if (operand_type_check (i.types[0], reg))
7945         first_reg_op = 0;
7946       else
7947         first_reg_op = 1;
7948       /* Pretend we saw the extra register operand.  */
7949       gas_assert (i.reg_operands == 1
7950                   && i.op[first_reg_op + 1].regs == 0);
7951       i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs;
7952       i.types[first_reg_op + 1] = i.types[first_reg_op];
7953       i.operands++;
7954       i.reg_operands++;
7955     }
7956
7957   if (i.tm.opcode_modifier.modrm)
7958     {
7959       /* The opcode is completed (modulo i.tm.extension_opcode which
7960          must be put into the modrm byte).  Now, we make the modrm and
7961          index base bytes based on all the info we've collected.  */
7962
7963       default_seg = build_modrm_byte ();
7964     }
7965   else if (i.types[0].bitfield.class == SReg)
7966     {
7967       if (flag_code != CODE_64BIT
7968           ? i.tm.base_opcode == POP_SEG_SHORT
7969             && i.op[0].regs->reg_num == 1
7970           : (i.tm.base_opcode | 1) == (POP_SEG386_SHORT & 0xff)
7971             && i.op[0].regs->reg_num < 4)
7972         {
7973           as_bad (_("you can't `%s %s%s'"),
7974                   i.tm.name, register_prefix, i.op[0].regs->reg_name);
7975           return 0;
7976         }
7977       if (i.op[0].regs->reg_num > 3
7978           && i.tm.opcode_modifier.opcodespace == SPACE_BASE )
7979         {
7980           i.tm.base_opcode ^= (POP_SEG_SHORT ^ POP_SEG386_SHORT) & 0xff;
7981           i.tm.opcode_modifier.opcodespace = SPACE_0F;
7982         }
7983       i.tm.base_opcode |= (i.op[0].regs->reg_num << 3);
7984     }
7985   else if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
7986            && (i.tm.base_opcode & ~3) == MOV_AX_DISP32)
7987     {
7988       default_seg = reg_ds;
7989     }
7990   else if (i.tm.opcode_modifier.isstring)
7991     {
7992       /* For the string instructions that allow a segment override
7993          on one of their operands, the default segment is ds.  */
7994       default_seg = reg_ds;
7995     }
7996   else if (i.short_form)
7997     {
7998       /* The register or float register operand is in operand
7999          0 or 1.  */
8000       unsigned int op = i.tm.operand_types[0].bitfield.class != Reg;
8001
8002       /* Register goes in low 3 bits of opcode.  */
8003       i.tm.base_opcode |= i.op[op].regs->reg_num;
8004       if ((i.op[op].regs->reg_flags & RegRex) != 0)
8005         i.rex |= REX_B;
8006       if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
8007         {
8008           /* Warn about some common errors, but press on regardless.
8009              The first case can be generated by gcc (<= 2.8.1).  */
8010           if (i.operands == 2)
8011             {
8012               /* Reversed arguments on faddp, fsubp, etc.  */
8013               as_warn (_("translating to `%s %s%s,%s%s'"), i.tm.name,
8014                        register_prefix, i.op[!intel_syntax].regs->reg_name,
8015                        register_prefix, i.op[intel_syntax].regs->reg_name);
8016             }
8017           else
8018             {
8019               /* Extraneous `l' suffix on fp insn.  */
8020               as_warn (_("translating to `%s %s%s'"), i.tm.name,
8021                        register_prefix, i.op[0].regs->reg_name);
8022             }
8023         }
8024     }
8025
8026   if ((i.seg[0] || i.prefix[SEG_PREFIX])
8027       && i.tm.base_opcode == 0x8d /* lea */
8028       && i.tm.opcode_modifier.opcodespace == SPACE_BASE
8029       && !is_any_vex_encoding(&i.tm))
8030     {
8031       if (!quiet_warnings)
8032         as_warn (_("segment override on `%s' is ineffectual"), i.tm.name);
8033       if (optimize)
8034         {
8035           i.seg[0] = NULL;
8036           i.prefix[SEG_PREFIX] = 0;
8037         }
8038     }
8039
8040   /* If a segment was explicitly specified, and the specified segment
8041      is neither the default nor the one already recorded from a prefix,
8042      use an opcode prefix to select it.  If we never figured out what
8043      the default segment is, then default_seg will be zero at this
8044      point, and the specified segment prefix will always be used.  */
8045   if (i.seg[0]
8046       && i.seg[0] != default_seg
8047       && i386_seg_prefixes[i.seg[0]->reg_num] != i.prefix[SEG_PREFIX])
8048     {
8049       if (!add_prefix (i386_seg_prefixes[i.seg[0]->reg_num]))
8050         return 0;
8051     }
8052   return 1;
8053 }
8054
8055 static INLINE void set_rex_vrex (const reg_entry *r, unsigned int rex_bit,
8056                                  bool do_sse2avx)
8057 {
8058   if (r->reg_flags & RegRex)
8059     {
8060       if (i.rex & rex_bit)
8061         as_bad (_("same type of prefix used twice"));
8062       i.rex |= rex_bit;
8063     }
8064   else if (do_sse2avx && (i.rex & rex_bit) && i.vex.register_specifier)
8065     {
8066       gas_assert (i.vex.register_specifier == r);
8067       i.vex.register_specifier += 8;
8068     }
8069
8070   if (r->reg_flags & RegVRex)
8071     i.vrex |= rex_bit;
8072 }
8073
8074 static const reg_entry *
8075 build_modrm_byte (void)
8076 {
8077   const reg_entry *default_seg = NULL;
8078   unsigned int source, dest;
8079   int vex_3_sources;
8080
8081   vex_3_sources = i.tm.opcode_modifier.vexsources == VEX3SOURCES;
8082   if (vex_3_sources)
8083     {
8084       unsigned int nds, reg_slot;
8085       expressionS *exp;
8086
8087       dest = i.operands - 1;
8088       nds = dest - 1;
8089
8090       /* There are 2 kinds of instructions:
8091          1. 5 operands: 4 register operands or 3 register operands
8092          plus 1 memory operand plus one Imm4 operand, VexXDS, and
8093          VexW0 or VexW1.  The destination must be either XMM, YMM or
8094          ZMM register.
8095          2. 4 operands: 4 register operands or 3 register operands
8096          plus 1 memory operand, with VexXDS.  */
8097       gas_assert ((i.reg_operands == 4
8098                    || (i.reg_operands == 3 && i.mem_operands == 1))
8099                   && i.tm.opcode_modifier.vexvvvv == VEXXDS
8100                   && i.tm.opcode_modifier.vexw
8101                   && i.tm.operand_types[dest].bitfield.class == RegSIMD);
8102
8103       /* If VexW1 is set, the first non-immediate operand is the source and
8104          the second non-immediate one is encoded in the immediate operand.  */
8105       if (i.tm.opcode_modifier.vexw == VEXW1)
8106         {
8107           source = i.imm_operands;
8108           reg_slot = i.imm_operands + 1;
8109         }
8110       else
8111         {
8112           source = i.imm_operands + 1;
8113           reg_slot = i.imm_operands;
8114         }
8115
8116       if (i.imm_operands == 0)
8117         {
8118           /* When there is no immediate operand, generate an 8bit
8119              immediate operand to encode the first operand.  */
8120           exp = &im_expressions[i.imm_operands++];
8121           i.op[i.operands].imms = exp;
8122           i.types[i.operands].bitfield.imm8 = 1;
8123           i.operands++;
8124
8125           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
8126           exp->X_op = O_constant;
8127           exp->X_add_number = register_number (i.op[reg_slot].regs) << 4;
8128           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
8129         }
8130       else
8131         {
8132           gas_assert (i.imm_operands == 1);
8133           gas_assert (fits_in_imm4 (i.op[0].imms->X_add_number));
8134           gas_assert (!i.tm.opcode_modifier.immext);
8135
8136           /* Turn on Imm8 again so that output_imm will generate it.  */
8137           i.types[0].bitfield.imm8 = 1;
8138
8139           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
8140           i.op[0].imms->X_add_number
8141               |= register_number (i.op[reg_slot].regs) << 4;
8142           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
8143         }
8144
8145       gas_assert (i.tm.operand_types[nds].bitfield.class == RegSIMD);
8146       i.vex.register_specifier = i.op[nds].regs;
8147     }
8148   else
8149     source = dest = 0;
8150
8151   /* i.reg_operands MUST be the number of real register operands;
8152      implicit registers do not count.  If there are 3 register
8153      operands, it must be a instruction with VexNDS.  For a
8154      instruction with VexNDD, the destination register is encoded
8155      in VEX prefix.  If there are 4 register operands, it must be
8156      a instruction with VEX prefix and 3 sources.  */
8157   if (i.mem_operands == 0
8158       && ((i.reg_operands == 2
8159            && i.tm.opcode_modifier.vexvvvv <= VEXXDS)
8160           || (i.reg_operands == 3
8161               && i.tm.opcode_modifier.vexvvvv == VEXXDS)
8162           || (i.reg_operands == 4 && vex_3_sources)))
8163     {
8164       switch (i.operands)
8165         {
8166         case 2:
8167           source = 0;
8168           break;
8169         case 3:
8170           /* When there are 3 operands, one of them may be immediate,
8171              which may be the first or the last operand.  Otherwise,
8172              the first operand must be shift count register (cl) or it
8173              is an instruction with VexNDS. */
8174           gas_assert (i.imm_operands == 1
8175                       || (i.imm_operands == 0
8176                           && (i.tm.opcode_modifier.vexvvvv == VEXXDS
8177                               || (i.types[0].bitfield.instance == RegC
8178                                   && i.types[0].bitfield.byte))));
8179           if (operand_type_check (i.types[0], imm)
8180               || (i.types[0].bitfield.instance == RegC
8181                   && i.types[0].bitfield.byte))
8182             source = 1;
8183           else
8184             source = 0;
8185           break;
8186         case 4:
8187           /* When there are 4 operands, the first two must be 8bit
8188              immediate operands. The source operand will be the 3rd
8189              one.
8190
8191              For instructions with VexNDS, if the first operand
8192              an imm8, the source operand is the 2nd one.  If the last
8193              operand is imm8, the source operand is the first one.  */
8194           gas_assert ((i.imm_operands == 2
8195                        && i.types[0].bitfield.imm8
8196                        && i.types[1].bitfield.imm8)
8197                       || (i.tm.opcode_modifier.vexvvvv == VEXXDS
8198                           && i.imm_operands == 1
8199                           && (i.types[0].bitfield.imm8
8200                               || i.types[i.operands - 1].bitfield.imm8)));
8201           if (i.imm_operands == 2)
8202             source = 2;
8203           else
8204             {
8205               if (i.types[0].bitfield.imm8)
8206                 source = 1;
8207               else
8208                 source = 0;
8209             }
8210           break;
8211         case 5:
8212           gas_assert (!is_evex_encoding (&i.tm));
8213           gas_assert (i.imm_operands == 1 && vex_3_sources);
8214           break;
8215         default:
8216           abort ();
8217         }
8218
8219       if (!vex_3_sources)
8220         {
8221           dest = source + 1;
8222
8223           if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
8224             {
8225               /* For instructions with VexNDS, the register-only source
8226                  operand must be a 32/64bit integer, XMM, YMM, ZMM, or mask
8227                  register.  It is encoded in VEX prefix.  */
8228
8229               i386_operand_type op;
8230               unsigned int vvvv;
8231
8232               /* Swap two source operands if needed.  */
8233               if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCES)
8234                 {
8235                   vvvv = source;
8236                   source = dest;
8237                 }
8238               else
8239                 vvvv = dest;
8240
8241               op = i.tm.operand_types[vvvv];
8242               if ((dest + 1) >= i.operands
8243                   || ((op.bitfield.class != Reg
8244                        || (!op.bitfield.dword && !op.bitfield.qword))
8245                       && op.bitfield.class != RegSIMD
8246                       && op.bitfield.class != RegMask))
8247                 abort ();
8248               i.vex.register_specifier = i.op[vvvv].regs;
8249               dest++;
8250             }
8251         }
8252
8253       i.rm.mode = 3;
8254       /* One of the register operands will be encoded in the i.rm.reg
8255          field, the other in the combined i.rm.mode and i.rm.regmem
8256          fields.  If no form of this instruction supports a memory
8257          destination operand, then we assume the source operand may
8258          sometimes be a memory operand and so we need to store the
8259          destination in the i.rm.reg field.  */
8260       if (!i.tm.opcode_modifier.regmem
8261           && operand_type_check (i.tm.operand_types[dest], anymem) == 0)
8262         {
8263           i.rm.reg = i.op[dest].regs->reg_num;
8264           i.rm.regmem = i.op[source].regs->reg_num;
8265           set_rex_vrex (i.op[dest].regs, REX_R, i.tm.opcode_modifier.sse2avx);
8266           set_rex_vrex (i.op[source].regs, REX_B, false);
8267         }
8268       else
8269         {
8270           i.rm.reg = i.op[source].regs->reg_num;
8271           i.rm.regmem = i.op[dest].regs->reg_num;
8272           set_rex_vrex (i.op[dest].regs, REX_B, i.tm.opcode_modifier.sse2avx);
8273           set_rex_vrex (i.op[source].regs, REX_R, false);
8274         }
8275       if (flag_code != CODE_64BIT && (i.rex & REX_R))
8276         {
8277           if (i.types[!i.tm.opcode_modifier.regmem].bitfield.class != RegCR)
8278             abort ();
8279           i.rex &= ~REX_R;
8280           add_prefix (LOCK_PREFIX_OPCODE);
8281         }
8282     }
8283   else
8284     {                   /* If it's not 2 reg operands...  */
8285       unsigned int mem;
8286
8287       if (i.mem_operands)
8288         {
8289           unsigned int fake_zero_displacement = 0;
8290           unsigned int op;
8291
8292           for (op = 0; op < i.operands; op++)
8293             if (i.flags[op] & Operand_Mem)
8294               break;
8295           gas_assert (op < i.operands);
8296
8297           if (i.tm.opcode_modifier.sib)
8298             {
8299               /* The index register of VSIB shouldn't be RegIZ.  */
8300               if (i.tm.opcode_modifier.sib != SIBMEM
8301                   && i.index_reg->reg_num == RegIZ)
8302                 abort ();
8303
8304               i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8305               if (!i.base_reg)
8306                 {
8307                   i.sib.base = NO_BASE_REGISTER;
8308                   i.sib.scale = i.log2_scale_factor;
8309                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8310                   i.types[op].bitfield.disp32 = 1;
8311                 }
8312
8313               /* Since the mandatory SIB always has index register, so
8314                  the code logic remains unchanged. The non-mandatory SIB
8315                  without index register is allowed and will be handled
8316                  later.  */
8317               if (i.index_reg)
8318                 {
8319                   if (i.index_reg->reg_num == RegIZ)
8320                     i.sib.index = NO_INDEX_REGISTER;
8321                   else
8322                     i.sib.index = i.index_reg->reg_num;
8323                   set_rex_vrex (i.index_reg, REX_X, false);
8324                 }
8325             }
8326
8327           default_seg = reg_ds;
8328
8329           if (i.base_reg == 0)
8330             {
8331               i.rm.mode = 0;
8332               if (!i.disp_operands)
8333                 fake_zero_displacement = 1;
8334               if (i.index_reg == 0)
8335                 {
8336                   /* Both check for VSIB and mandatory non-vector SIB. */
8337                   gas_assert (!i.tm.opcode_modifier.sib
8338                               || i.tm.opcode_modifier.sib == SIBMEM);
8339                   /* Operand is just <disp>  */
8340                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8341                   if (flag_code == CODE_64BIT)
8342                     {
8343                       /* 64bit mode overwrites the 32bit absolute
8344                          addressing by RIP relative addressing and
8345                          absolute addressing is encoded by one of the
8346                          redundant SIB forms.  */
8347                       i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8348                       i.sib.base = NO_BASE_REGISTER;
8349                       i.sib.index = NO_INDEX_REGISTER;
8350                       i.types[op].bitfield.disp32 = 1;
8351                     }
8352                   else if ((flag_code == CODE_16BIT)
8353                            ^ (i.prefix[ADDR_PREFIX] != 0))
8354                     {
8355                       i.rm.regmem = NO_BASE_REGISTER_16;
8356                       i.types[op].bitfield.disp16 = 1;
8357                     }
8358                   else
8359                     {
8360                       i.rm.regmem = NO_BASE_REGISTER;
8361                       i.types[op].bitfield.disp32 = 1;
8362                     }
8363                 }
8364               else if (!i.tm.opcode_modifier.sib)
8365                 {
8366                   /* !i.base_reg && i.index_reg  */
8367                   if (i.index_reg->reg_num == RegIZ)
8368                     i.sib.index = NO_INDEX_REGISTER;
8369                   else
8370                     i.sib.index = i.index_reg->reg_num;
8371                   i.sib.base = NO_BASE_REGISTER;
8372                   i.sib.scale = i.log2_scale_factor;
8373                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8374                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8375                   i.types[op].bitfield.disp32 = 1;
8376                   if ((i.index_reg->reg_flags & RegRex) != 0)
8377                     i.rex |= REX_X;
8378                 }
8379             }
8380           /* RIP addressing for 64bit mode.  */
8381           else if (i.base_reg->reg_num == RegIP)
8382             {
8383               gas_assert (!i.tm.opcode_modifier.sib);
8384               i.rm.regmem = NO_BASE_REGISTER;
8385               i.types[op].bitfield.disp8 = 0;
8386               i.types[op].bitfield.disp16 = 0;
8387               i.types[op].bitfield.disp32 = 1;
8388               i.types[op].bitfield.disp64 = 0;
8389               i.flags[op] |= Operand_PCrel;
8390               if (! i.disp_operands)
8391                 fake_zero_displacement = 1;
8392             }
8393           else if (i.base_reg->reg_type.bitfield.word)
8394             {
8395               gas_assert (!i.tm.opcode_modifier.sib);
8396               switch (i.base_reg->reg_num)
8397                 {
8398                 case 3: /* (%bx)  */
8399                   if (i.index_reg == 0)
8400                     i.rm.regmem = 7;
8401                   else /* (%bx,%si) -> 0, or (%bx,%di) -> 1  */
8402                     i.rm.regmem = i.index_reg->reg_num - 6;
8403                   break;
8404                 case 5: /* (%bp)  */
8405                   default_seg = reg_ss;
8406                   if (i.index_reg == 0)
8407                     {
8408                       i.rm.regmem = 6;
8409                       if (operand_type_check (i.types[op], disp) == 0)
8410                         {
8411                           /* fake (%bp) into 0(%bp)  */
8412                           if (i.disp_encoding == disp_encoding_16bit)
8413                             i.types[op].bitfield.disp16 = 1;
8414                           else
8415                             i.types[op].bitfield.disp8 = 1;
8416                           fake_zero_displacement = 1;
8417                         }
8418                     }
8419                   else /* (%bp,%si) -> 2, or (%bp,%di) -> 3  */
8420                     i.rm.regmem = i.index_reg->reg_num - 6 + 2;
8421                   break;
8422                 default: /* (%si) -> 4 or (%di) -> 5  */
8423                   i.rm.regmem = i.base_reg->reg_num - 6 + 4;
8424                 }
8425               if (!fake_zero_displacement
8426                   && !i.disp_operands
8427                   && i.disp_encoding)
8428                 {
8429                   fake_zero_displacement = 1;
8430                   if (i.disp_encoding == disp_encoding_8bit)
8431                     i.types[op].bitfield.disp8 = 1;
8432                   else
8433                     i.types[op].bitfield.disp16 = 1;
8434                 }
8435               i.rm.mode = mode_from_disp_size (i.types[op]);
8436             }
8437           else /* i.base_reg and 32/64 bit mode  */
8438             {
8439               if (operand_type_check (i.types[op], disp))
8440                 {
8441                   i.types[op].bitfield.disp16 = 0;
8442                   i.types[op].bitfield.disp64 = 0;
8443                   i.types[op].bitfield.disp32 = 1;
8444                 }
8445
8446               if (!i.tm.opcode_modifier.sib)
8447                 i.rm.regmem = i.base_reg->reg_num;
8448               if ((i.base_reg->reg_flags & RegRex) != 0)
8449                 i.rex |= REX_B;
8450               i.sib.base = i.base_reg->reg_num;
8451               /* x86-64 ignores REX prefix bit here to avoid decoder
8452                  complications.  */
8453               if (!(i.base_reg->reg_flags & RegRex)
8454                   && (i.base_reg->reg_num == EBP_REG_NUM
8455                    || i.base_reg->reg_num == ESP_REG_NUM))
8456                   default_seg = reg_ss;
8457               if (i.base_reg->reg_num == 5 && i.disp_operands == 0)
8458                 {
8459                   fake_zero_displacement = 1;
8460                   if (i.disp_encoding == disp_encoding_32bit)
8461                     i.types[op].bitfield.disp32 = 1;
8462                   else
8463                     i.types[op].bitfield.disp8 = 1;
8464                 }
8465               i.sib.scale = i.log2_scale_factor;
8466               if (i.index_reg == 0)
8467                 {
8468                   /* Only check for VSIB. */
8469                   gas_assert (i.tm.opcode_modifier.sib != VECSIB128
8470                               && i.tm.opcode_modifier.sib != VECSIB256
8471                               && i.tm.opcode_modifier.sib != VECSIB512);
8472
8473                   /* <disp>(%esp) becomes two byte modrm with no index
8474                      register.  We've already stored the code for esp
8475                      in i.rm.regmem ie. ESCAPE_TO_TWO_BYTE_ADDRESSING.
8476                      Any base register besides %esp will not use the
8477                      extra modrm byte.  */
8478                   i.sib.index = NO_INDEX_REGISTER;
8479                 }
8480               else if (!i.tm.opcode_modifier.sib)
8481                 {
8482                   if (i.index_reg->reg_num == RegIZ)
8483                     i.sib.index = NO_INDEX_REGISTER;
8484                   else
8485                     i.sib.index = i.index_reg->reg_num;
8486                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8487                   if ((i.index_reg->reg_flags & RegRex) != 0)
8488                     i.rex |= REX_X;
8489                 }
8490
8491               if (i.disp_operands
8492                   && (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
8493                       || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL))
8494                 i.rm.mode = 0;
8495               else
8496                 {
8497                   if (!fake_zero_displacement
8498                       && !i.disp_operands
8499                       && i.disp_encoding)
8500                     {
8501                       fake_zero_displacement = 1;
8502                       if (i.disp_encoding == disp_encoding_8bit)
8503                         i.types[op].bitfield.disp8 = 1;
8504                       else
8505                         i.types[op].bitfield.disp32 = 1;
8506                     }
8507                   i.rm.mode = mode_from_disp_size (i.types[op]);
8508                 }
8509             }
8510
8511           if (fake_zero_displacement)
8512             {
8513               /* Fakes a zero displacement assuming that i.types[op]
8514                  holds the correct displacement size.  */
8515               expressionS *exp;
8516
8517               gas_assert (i.op[op].disps == 0);
8518               exp = &disp_expressions[i.disp_operands++];
8519               i.op[op].disps = exp;
8520               exp->X_op = O_constant;
8521               exp->X_add_number = 0;
8522               exp->X_add_symbol = (symbolS *) 0;
8523               exp->X_op_symbol = (symbolS *) 0;
8524             }
8525
8526           mem = op;
8527         }
8528       else
8529         mem = ~0;
8530
8531       if (i.tm.opcode_modifier.vexsources == XOP2SOURCES)
8532         {
8533           if (operand_type_check (i.types[0], imm))
8534             i.vex.register_specifier = NULL;
8535           else
8536             {
8537               /* VEX.vvvv encodes one of the sources when the first
8538                  operand is not an immediate.  */
8539               if (i.tm.opcode_modifier.vexw == VEXW0)
8540                 i.vex.register_specifier = i.op[0].regs;
8541               else
8542                 i.vex.register_specifier = i.op[1].regs;
8543             }
8544
8545           /* Destination is a XMM register encoded in the ModRM.reg
8546              and VEX.R bit.  */
8547           i.rm.reg = i.op[2].regs->reg_num;
8548           if ((i.op[2].regs->reg_flags & RegRex) != 0)
8549             i.rex |= REX_R;
8550
8551           /* ModRM.rm and VEX.B encodes the other source.  */
8552           if (!i.mem_operands)
8553             {
8554               i.rm.mode = 3;
8555
8556               if (i.tm.opcode_modifier.vexw == VEXW0)
8557                 i.rm.regmem = i.op[1].regs->reg_num;
8558               else
8559                 i.rm.regmem = i.op[0].regs->reg_num;
8560
8561               if ((i.op[1].regs->reg_flags & RegRex) != 0)
8562                 i.rex |= REX_B;
8563             }
8564         }
8565       else if (i.tm.opcode_modifier.vexvvvv == VEXLWP)
8566         {
8567           i.vex.register_specifier = i.op[2].regs;
8568           if (!i.mem_operands)
8569             {
8570               i.rm.mode = 3;
8571               i.rm.regmem = i.op[1].regs->reg_num;
8572               if ((i.op[1].regs->reg_flags & RegRex) != 0)
8573                 i.rex |= REX_B;
8574             }
8575         }
8576       /* Fill in i.rm.reg or i.rm.regmem field with register operand
8577          (if any) based on i.tm.extension_opcode.  Again, we must be
8578          careful to make sure that segment/control/debug/test/MMX
8579          registers are coded into the i.rm.reg field.  */
8580       else if (i.reg_operands)
8581         {
8582           unsigned int op;
8583           unsigned int vex_reg = ~0;
8584
8585           for (op = 0; op < i.operands; op++)
8586             if (i.types[op].bitfield.class == Reg
8587                 || i.types[op].bitfield.class == RegBND
8588                 || i.types[op].bitfield.class == RegMask
8589                 || i.types[op].bitfield.class == SReg
8590                 || i.types[op].bitfield.class == RegCR
8591                 || i.types[op].bitfield.class == RegDR
8592                 || i.types[op].bitfield.class == RegTR
8593                 || i.types[op].bitfield.class == RegSIMD
8594                 || i.types[op].bitfield.class == RegMMX)
8595               break;
8596
8597           if (vex_3_sources)
8598             op = dest;
8599           else if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
8600             {
8601               /* For instructions with VexNDS, the register-only
8602                  source operand is encoded in VEX prefix. */
8603               gas_assert (mem != (unsigned int) ~0);
8604
8605               if (op > mem || i.tm.cpu_flags.bitfield.cpucmpccxadd)
8606                 {
8607                   vex_reg = op++;
8608                   gas_assert (op < i.operands);
8609                 }
8610               else
8611                 {
8612                   /* Check register-only source operand when two source
8613                      operands are swapped.  */
8614                   if (!i.tm.operand_types[op].bitfield.baseindex
8615                       && i.tm.operand_types[op + 1].bitfield.baseindex)
8616                     {
8617                       vex_reg = op;
8618                       op += 2;
8619                       gas_assert (mem == (vex_reg + 1)
8620                                   && op < i.operands);
8621                     }
8622                   else
8623                     {
8624                       vex_reg = op + 1;
8625                       gas_assert (vex_reg < i.operands);
8626                     }
8627                 }
8628             }
8629           else if (i.tm.opcode_modifier.vexvvvv == VEXNDD)
8630             {
8631               /* For instructions with VexNDD, the register destination
8632                  is encoded in VEX prefix.  */
8633               if (i.mem_operands == 0)
8634                 {
8635                   /* There is no memory operand.  */
8636                   gas_assert ((op + 2) == i.operands);
8637                   vex_reg = op + 1;
8638                 }
8639               else
8640                 {
8641                   /* There are only 2 non-immediate operands.  */
8642                   gas_assert (op < i.imm_operands + 2
8643                               && i.operands == i.imm_operands + 2);
8644                   vex_reg = i.imm_operands + 1;
8645                 }
8646             }
8647           else
8648             gas_assert (op < i.operands);
8649
8650           if (vex_reg != (unsigned int) ~0)
8651             {
8652               i386_operand_type *type = &i.tm.operand_types[vex_reg];
8653
8654               if ((type->bitfield.class != Reg
8655                    || (!type->bitfield.dword && !type->bitfield.qword))
8656                   && type->bitfield.class != RegSIMD
8657                   && type->bitfield.class != RegMask)
8658                 abort ();
8659
8660               i.vex.register_specifier = i.op[vex_reg].regs;
8661             }
8662
8663           /* Don't set OP operand twice.  */
8664           if (vex_reg != op)
8665             {
8666               /* If there is an extension opcode to put here, the
8667                  register number must be put into the regmem field.  */
8668               if (i.tm.extension_opcode != None)
8669                 {
8670                   i.rm.regmem = i.op[op].regs->reg_num;
8671                   set_rex_vrex (i.op[op].regs, REX_B,
8672                                 i.tm.opcode_modifier.sse2avx);
8673                 }
8674               else
8675                 {
8676                   i.rm.reg = i.op[op].regs->reg_num;
8677                   set_rex_vrex (i.op[op].regs, REX_R,
8678                                 i.tm.opcode_modifier.sse2avx);
8679                 }
8680             }
8681
8682           /* Now, if no memory operand has set i.rm.mode = 0, 1, 2 we
8683              must set it to 3 to indicate this is a register operand
8684              in the regmem field.  */
8685           if (!i.mem_operands)
8686             i.rm.mode = 3;
8687         }
8688
8689       /* Fill in i.rm.reg field with extension opcode (if any).  */
8690       if (i.tm.extension_opcode != None)
8691         i.rm.reg = i.tm.extension_opcode;
8692     }
8693   return default_seg;
8694 }
8695
8696 static INLINE void
8697 frag_opcode_byte (unsigned char byte)
8698 {
8699   if (now_seg != absolute_section)
8700     FRAG_APPEND_1_CHAR (byte);
8701   else
8702     ++abs_section_offset;
8703 }
8704
8705 static unsigned int
8706 flip_code16 (unsigned int code16)
8707 {
8708   gas_assert (i.tm.operands == 1);
8709
8710   return !(i.prefix[REX_PREFIX] & REX_W)
8711          && (code16 ? i.tm.operand_types[0].bitfield.disp32
8712                     : i.tm.operand_types[0].bitfield.disp16)
8713          ? CODE16 : 0;
8714 }
8715
8716 static void
8717 output_branch (void)
8718 {
8719   char *p;
8720   int size;
8721   int code16;
8722   int prefix;
8723   relax_substateT subtype;
8724   symbolS *sym;
8725   offsetT off;
8726
8727   if (now_seg == absolute_section)
8728     {
8729       as_bad (_("relaxable branches not supported in absolute section"));
8730       return;
8731     }
8732
8733   code16 = flag_code == CODE_16BIT ? CODE16 : 0;
8734   size = i.disp_encoding > disp_encoding_8bit ? BIG : SMALL;
8735
8736   prefix = 0;
8737   if (i.prefix[DATA_PREFIX] != 0)
8738     {
8739       prefix = 1;
8740       i.prefixes -= 1;
8741       code16 ^= flip_code16(code16);
8742     }
8743   /* Pentium4 branch hints.  */
8744   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8745       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8746     {
8747       prefix++;
8748       i.prefixes--;
8749     }
8750   if (i.prefix[REX_PREFIX] != 0)
8751     {
8752       prefix++;
8753       i.prefixes--;
8754     }
8755
8756   /* BND prefixed jump.  */
8757   if (i.prefix[BND_PREFIX] != 0)
8758     {
8759       prefix++;
8760       i.prefixes--;
8761     }
8762
8763   if (i.prefixes != 0)
8764     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8765
8766   /* It's always a symbol;  End frag & setup for relax.
8767      Make sure there is enough room in this frag for the largest
8768      instruction we may generate in md_convert_frag.  This is 2
8769      bytes for the opcode and room for the prefix and largest
8770      displacement.  */
8771   frag_grow (prefix + 2 + 4);
8772   /* Prefix and 1 opcode byte go in fr_fix.  */
8773   p = frag_more (prefix + 1);
8774   if (i.prefix[DATA_PREFIX] != 0)
8775     *p++ = DATA_PREFIX_OPCODE;
8776   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE
8777       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE)
8778     *p++ = i.prefix[SEG_PREFIX];
8779   if (i.prefix[BND_PREFIX] != 0)
8780     *p++ = BND_PREFIX_OPCODE;
8781   if (i.prefix[REX_PREFIX] != 0)
8782     *p++ = i.prefix[REX_PREFIX];
8783   *p = i.tm.base_opcode;
8784
8785   if ((unsigned char) *p == JUMP_PC_RELATIVE)
8786     subtype = ENCODE_RELAX_STATE (UNCOND_JUMP, size);
8787   else if (cpu_arch_flags.bitfield.cpui386)
8788     subtype = ENCODE_RELAX_STATE (COND_JUMP, size);
8789   else
8790     subtype = ENCODE_RELAX_STATE (COND_JUMP86, size);
8791   subtype |= code16;
8792
8793   sym = i.op[0].disps->X_add_symbol;
8794   off = i.op[0].disps->X_add_number;
8795
8796   if (i.op[0].disps->X_op != O_constant
8797       && i.op[0].disps->X_op != O_symbol)
8798     {
8799       /* Handle complex expressions.  */
8800       sym = make_expr_symbol (i.op[0].disps);
8801       off = 0;
8802     }
8803
8804   frag_now->tc_frag_data.code64 = flag_code == CODE_64BIT;
8805
8806   /* 1 possible extra opcode + 4 byte displacement go in var part.
8807      Pass reloc in fr_var.  */
8808   frag_var (rs_machine_dependent, 5, i.reloc[0], subtype, sym, off, p);
8809 }
8810
8811 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8812 /* Return TRUE iff PLT32 relocation should be used for branching to
8813    symbol S.  */
8814
8815 static bool
8816 need_plt32_p (symbolS *s)
8817 {
8818   /* PLT32 relocation is ELF only.  */
8819   if (!IS_ELF)
8820     return false;
8821
8822 #ifdef TE_SOLARIS
8823   /* Don't emit PLT32 relocation on Solaris: neither native linker nor
8824      krtld support it.  */
8825   return false;
8826 #endif
8827
8828   /* Since there is no need to prepare for PLT branch on x86-64, we
8829      can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
8830      be used as a marker for 32-bit PC-relative branches.  */
8831   if (!object_64bit)
8832     return false;
8833
8834   if (s == NULL)
8835     return false;
8836
8837   /* Weak or undefined symbol need PLT32 relocation.  */
8838   if (S_IS_WEAK (s) || !S_IS_DEFINED (s))
8839     return true;
8840
8841   /* Non-global symbol doesn't need PLT32 relocation.  */
8842   if (! S_IS_EXTERNAL (s))
8843     return false;
8844
8845   /* Other global symbols need PLT32 relocation.  NB: Symbol with
8846      non-default visibilities are treated as normal global symbol
8847      so that PLT32 relocation can be used as a marker for 32-bit
8848      PC-relative branches.  It is useful for linker relaxation.  */
8849   return true;
8850 }
8851 #endif
8852
8853 static void
8854 output_jump (void)
8855 {
8856   char *p;
8857   int size;
8858   fixS *fixP;
8859   bfd_reloc_code_real_type jump_reloc = i.reloc[0];
8860
8861   if (i.tm.opcode_modifier.jump == JUMP_BYTE)
8862     {
8863       /* This is a loop or jecxz type instruction.  */
8864       size = 1;
8865       if (i.prefix[ADDR_PREFIX] != 0)
8866         {
8867           frag_opcode_byte (ADDR_PREFIX_OPCODE);
8868           i.prefixes -= 1;
8869         }
8870       /* Pentium4 branch hints.  */
8871       if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8872           || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8873         {
8874           frag_opcode_byte (i.prefix[SEG_PREFIX]);
8875           i.prefixes--;
8876         }
8877     }
8878   else
8879     {
8880       int code16;
8881
8882       code16 = 0;
8883       if (flag_code == CODE_16BIT)
8884         code16 = CODE16;
8885
8886       if (i.prefix[DATA_PREFIX] != 0)
8887         {
8888           frag_opcode_byte (DATA_PREFIX_OPCODE);
8889           i.prefixes -= 1;
8890           code16 ^= flip_code16(code16);
8891         }
8892
8893       size = 4;
8894       if (code16)
8895         size = 2;
8896     }
8897
8898   /* BND prefixed jump.  */
8899   if (i.prefix[BND_PREFIX] != 0)
8900     {
8901       frag_opcode_byte (i.prefix[BND_PREFIX]);
8902       i.prefixes -= 1;
8903     }
8904
8905   if (i.prefix[REX_PREFIX] != 0)
8906     {
8907       frag_opcode_byte (i.prefix[REX_PREFIX]);
8908       i.prefixes -= 1;
8909     }
8910
8911   if (i.prefixes != 0)
8912     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8913
8914   if (now_seg == absolute_section)
8915     {
8916       abs_section_offset += i.opcode_length + size;
8917       return;
8918     }
8919
8920   p = frag_more (i.opcode_length + size);
8921   switch (i.opcode_length)
8922     {
8923     case 2:
8924       *p++ = i.tm.base_opcode >> 8;
8925       /* Fall through.  */
8926     case 1:
8927       *p++ = i.tm.base_opcode;
8928       break;
8929     default:
8930       abort ();
8931     }
8932
8933 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8934   if (flag_code == CODE_64BIT && size == 4
8935       && jump_reloc == NO_RELOC && i.op[0].disps->X_add_number == 0
8936       && need_plt32_p (i.op[0].disps->X_add_symbol))
8937     jump_reloc = BFD_RELOC_X86_64_PLT32;
8938 #endif
8939
8940   jump_reloc = reloc (size, 1, 1, jump_reloc);
8941
8942   fixP = fix_new_exp (frag_now, p - frag_now->fr_literal, size,
8943                       i.op[0].disps, 1, jump_reloc);
8944
8945   /* All jumps handled here are signed, but don't unconditionally use a
8946      signed limit check for 32 and 16 bit jumps as we want to allow wrap
8947      around at 4G (outside of 64-bit mode) and 64k (except for XBEGIN)
8948      respectively.  */
8949   switch (size)
8950     {
8951     case 1:
8952       fixP->fx_signed = 1;
8953       break;
8954
8955     case 2:
8956       if (i.tm.base_opcode == 0xc7f8)
8957         fixP->fx_signed = 1;
8958       break;
8959
8960     case 4:
8961       if (flag_code == CODE_64BIT)
8962         fixP->fx_signed = 1;
8963       break;
8964     }
8965 }
8966
8967 static void
8968 output_interseg_jump (void)
8969 {
8970   char *p;
8971   int size;
8972   int prefix;
8973   int code16;
8974
8975   code16 = 0;
8976   if (flag_code == CODE_16BIT)
8977     code16 = CODE16;
8978
8979   prefix = 0;
8980   if (i.prefix[DATA_PREFIX] != 0)
8981     {
8982       prefix = 1;
8983       i.prefixes -= 1;
8984       code16 ^= CODE16;
8985     }
8986
8987   gas_assert (!i.prefix[REX_PREFIX]);
8988
8989   size = 4;
8990   if (code16)
8991     size = 2;
8992
8993   if (i.prefixes != 0)
8994     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8995
8996   if (now_seg == absolute_section)
8997     {
8998       abs_section_offset += prefix + 1 + 2 + size;
8999       return;
9000     }
9001
9002   /* 1 opcode; 2 segment; offset  */
9003   p = frag_more (prefix + 1 + 2 + size);
9004
9005   if (i.prefix[DATA_PREFIX] != 0)
9006     *p++ = DATA_PREFIX_OPCODE;
9007
9008   if (i.prefix[REX_PREFIX] != 0)
9009     *p++ = i.prefix[REX_PREFIX];
9010
9011   *p++ = i.tm.base_opcode;
9012   if (i.op[1].imms->X_op == O_constant)
9013     {
9014       offsetT n = i.op[1].imms->X_add_number;
9015
9016       if (size == 2
9017           && !fits_in_unsigned_word (n)
9018           && !fits_in_signed_word (n))
9019         {
9020           as_bad (_("16-bit jump out of range"));
9021           return;
9022         }
9023       md_number_to_chars (p, n, size);
9024     }
9025   else
9026     fix_new_exp (frag_now, p - frag_now->fr_literal, size,
9027                  i.op[1].imms, 0, reloc (size, 0, 0, i.reloc[1]));
9028
9029   p += size;
9030   if (i.op[0].imms->X_op == O_constant)
9031     md_number_to_chars (p, (valueT) i.op[0].imms->X_add_number, 2);
9032   else
9033     fix_new_exp (frag_now, p - frag_now->fr_literal, 2,
9034                  i.op[0].imms, 0, reloc (2, 0, 0, i.reloc[0]));
9035 }
9036
9037 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9038 void
9039 x86_cleanup (void)
9040 {
9041   char *p;
9042   asection *seg = now_seg;
9043   subsegT subseg = now_subseg;
9044   asection *sec;
9045   unsigned int alignment, align_size_1;
9046   unsigned int isa_1_descsz, feature_2_descsz, descsz;
9047   unsigned int isa_1_descsz_raw, feature_2_descsz_raw;
9048   unsigned int padding;
9049
9050   if (!IS_ELF || !x86_used_note)
9051     return;
9052
9053   x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
9054
9055   /* The .note.gnu.property section layout:
9056
9057      Field      Length          Contents
9058      ----       ----            ----
9059      n_namsz    4               4
9060      n_descsz   4               The note descriptor size
9061      n_type     4               NT_GNU_PROPERTY_TYPE_0
9062      n_name     4               "GNU"
9063      n_desc     n_descsz        The program property array
9064      ....       ....            ....
9065    */
9066
9067   /* Create the .note.gnu.property section.  */
9068   sec = subseg_new (NOTE_GNU_PROPERTY_SECTION_NAME, 0);
9069   bfd_set_section_flags (sec,
9070                          (SEC_ALLOC
9071                           | SEC_LOAD
9072                           | SEC_DATA
9073                           | SEC_HAS_CONTENTS
9074                           | SEC_READONLY));
9075
9076   if (get_elf_backend_data (stdoutput)->s->elfclass == ELFCLASS64)
9077     {
9078       align_size_1 = 7;
9079       alignment = 3;
9080     }
9081   else
9082     {
9083       align_size_1 = 3;
9084       alignment = 2;
9085     }
9086
9087   bfd_set_section_alignment (sec, alignment);
9088   elf_section_type (sec) = SHT_NOTE;
9089
9090   /* GNU_PROPERTY_X86_ISA_1_USED: 4-byte type + 4-byte data size
9091                                   + 4-byte data  */
9092   isa_1_descsz_raw = 4 + 4 + 4;
9093   /* Align GNU_PROPERTY_X86_ISA_1_USED.  */
9094   isa_1_descsz = (isa_1_descsz_raw + align_size_1) & ~align_size_1;
9095
9096   feature_2_descsz_raw = isa_1_descsz;
9097   /* GNU_PROPERTY_X86_FEATURE_2_USED: 4-byte type + 4-byte data size
9098                                       + 4-byte data  */
9099   feature_2_descsz_raw += 4 + 4 + 4;
9100   /* Align GNU_PROPERTY_X86_FEATURE_2_USED.  */
9101   feature_2_descsz = ((feature_2_descsz_raw + align_size_1)
9102                       & ~align_size_1);
9103
9104   descsz = feature_2_descsz;
9105   /* Section size: n_namsz + n_descsz + n_type + n_name + n_descsz.  */
9106   p = frag_more (4 + 4 + 4 + 4 + descsz);
9107
9108   /* Write n_namsz.  */
9109   md_number_to_chars (p, (valueT) 4, 4);
9110
9111   /* Write n_descsz.  */
9112   md_number_to_chars (p + 4, (valueT) descsz, 4);
9113
9114   /* Write n_type.  */
9115   md_number_to_chars (p + 4 * 2, (valueT) NT_GNU_PROPERTY_TYPE_0, 4);
9116
9117   /* Write n_name.  */
9118   memcpy (p + 4 * 3, "GNU", 4);
9119
9120   /* Write 4-byte type.  */
9121   md_number_to_chars (p + 4 * 4,
9122                       (valueT) GNU_PROPERTY_X86_ISA_1_USED, 4);
9123
9124   /* Write 4-byte data size.  */
9125   md_number_to_chars (p + 4 * 5, (valueT) 4, 4);
9126
9127   /* Write 4-byte data.  */
9128   md_number_to_chars (p + 4 * 6, (valueT) x86_isa_1_used, 4);
9129
9130   /* Zero out paddings.  */
9131   padding = isa_1_descsz - isa_1_descsz_raw;
9132   if (padding)
9133     memset (p + 4 * 7, 0, padding);
9134
9135   /* Write 4-byte type.  */
9136   md_number_to_chars (p + isa_1_descsz + 4 * 4,
9137                       (valueT) GNU_PROPERTY_X86_FEATURE_2_USED, 4);
9138
9139   /* Write 4-byte data size.  */
9140   md_number_to_chars (p + isa_1_descsz + 4 * 5, (valueT) 4, 4);
9141
9142   /* Write 4-byte data.  */
9143   md_number_to_chars (p + isa_1_descsz + 4 * 6,
9144                       (valueT) x86_feature_2_used, 4);
9145
9146   /* Zero out paddings.  */
9147   padding = feature_2_descsz - feature_2_descsz_raw;
9148   if (padding)
9149     memset (p + isa_1_descsz + 4 * 7, 0, padding);
9150
9151   /* We probably can't restore the current segment, for there likely
9152      isn't one yet...  */
9153   if (seg && subseg)
9154     subseg_set (seg, subseg);
9155 }
9156
9157 bool
9158 x86_support_sframe_p (void)
9159 {
9160   /* At this time, SFrame unwind is supported for AMD64 ABI only.  */
9161   return (x86_elf_abi == X86_64_ABI);
9162 }
9163
9164 bool
9165 x86_sframe_ra_tracking_p (void)
9166 {
9167   /* In AMD64, return address is always stored on the stack at a fixed offset
9168      from the CFA (provided via x86_sframe_cfa_ra_offset ()).
9169      Do not track explicitly via an SFrame Frame Row Entry.  */
9170   return false;
9171 }
9172
9173 offsetT
9174 x86_sframe_cfa_ra_offset (void)
9175 {
9176   gas_assert (x86_elf_abi == X86_64_ABI);
9177   return (offsetT) -8;
9178 }
9179
9180 unsigned char
9181 x86_sframe_get_abi_arch (void)
9182 {
9183   unsigned char sframe_abi_arch = 0;
9184
9185   if (x86_support_sframe_p ())
9186     {
9187       gas_assert (!target_big_endian);
9188       sframe_abi_arch = SFRAME_ABI_AMD64_ENDIAN_LITTLE;
9189     }
9190
9191   return sframe_abi_arch;
9192 }
9193
9194 #endif
9195
9196 static unsigned int
9197 encoding_length (const fragS *start_frag, offsetT start_off,
9198                  const char *frag_now_ptr)
9199 {
9200   unsigned int len = 0;
9201
9202   if (start_frag != frag_now)
9203     {
9204       const fragS *fr = start_frag;
9205
9206       do {
9207         len += fr->fr_fix;
9208         fr = fr->fr_next;
9209       } while (fr && fr != frag_now);
9210     }
9211
9212   return len - start_off + (frag_now_ptr - frag_now->fr_literal);
9213 }
9214
9215 /* Return 1 for test, and, cmp, add, sub, inc and dec which may
9216    be macro-fused with conditional jumps.
9217    NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
9218    or is one of the following format:
9219
9220     cmp m, imm
9221     add m, imm
9222     sub m, imm
9223    test m, imm
9224     and m, imm
9225     inc m
9226     dec m
9227
9228    it is unfusible.  */
9229
9230 static int
9231 maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
9232 {
9233   /* No RIP address.  */
9234   if (i.base_reg && i.base_reg->reg_num == RegIP)
9235     return 0;
9236
9237   /* No opcodes outside of base encoding space.  */
9238   if (i.tm.opcode_modifier.opcodespace != SPACE_BASE)
9239     return 0;
9240
9241   /* add, sub without add/sub m, imm.  */
9242   if (i.tm.base_opcode <= 5
9243       || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
9244       || ((i.tm.base_opcode | 3) == 0x83
9245           && (i.tm.extension_opcode == 0x5
9246               || i.tm.extension_opcode == 0x0)))
9247     {
9248       *mf_cmp_p = mf_cmp_alu_cmp;
9249       return !(i.mem_operands && i.imm_operands);
9250     }
9251
9252   /* and without and m, imm.  */
9253   if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
9254       || ((i.tm.base_opcode | 3) == 0x83
9255           && i.tm.extension_opcode == 0x4))
9256     {
9257       *mf_cmp_p = mf_cmp_test_and;
9258       return !(i.mem_operands && i.imm_operands);
9259     }
9260
9261   /* test without test m imm.  */
9262   if ((i.tm.base_opcode | 1) == 0x85
9263       || (i.tm.base_opcode | 1) == 0xa9
9264       || ((i.tm.base_opcode | 1) == 0xf7
9265           && i.tm.extension_opcode == 0))
9266     {
9267       *mf_cmp_p = mf_cmp_test_and;
9268       return !(i.mem_operands && i.imm_operands);
9269     }
9270
9271   /* cmp without cmp m, imm.  */
9272   if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
9273       || ((i.tm.base_opcode | 3) == 0x83
9274           && (i.tm.extension_opcode == 0x7)))
9275     {
9276       *mf_cmp_p = mf_cmp_alu_cmp;
9277       return !(i.mem_operands && i.imm_operands);
9278     }
9279
9280   /* inc, dec without inc/dec m.   */
9281   if ((i.tm.cpu_flags.bitfield.cpuno64
9282        && (i.tm.base_opcode | 0xf) == 0x4f)
9283       || ((i.tm.base_opcode | 1) == 0xff
9284           && i.tm.extension_opcode <= 0x1))
9285     {
9286       *mf_cmp_p = mf_cmp_incdec;
9287       return !i.mem_operands;
9288     }
9289
9290   return 0;
9291 }
9292
9293 /* Return 1 if a FUSED_JCC_PADDING frag should be generated.  */
9294
9295 static int
9296 add_fused_jcc_padding_frag_p (enum mf_cmp_kind* mf_cmp_p)
9297 {
9298   /* NB: Don't work with COND_JUMP86 without i386.  */
9299   if (!align_branch_power
9300       || now_seg == absolute_section
9301       || !cpu_arch_flags.bitfield.cpui386
9302       || !(align_branch & align_branch_fused_bit))
9303     return 0;
9304
9305   if (maybe_fused_with_jcc_p (mf_cmp_p))
9306     {
9307       if (last_insn.kind == last_insn_other
9308           || last_insn.seg != now_seg)
9309         return 1;
9310       if (flag_debug)
9311         as_warn_where (last_insn.file, last_insn.line,
9312                        _("`%s` skips -malign-branch-boundary on `%s`"),
9313                        last_insn.name, i.tm.name);
9314     }
9315
9316   return 0;
9317 }
9318
9319 /* Return 1 if a BRANCH_PREFIX frag should be generated.  */
9320
9321 static int
9322 add_branch_prefix_frag_p (void)
9323 {
9324   /* NB: Don't work with COND_JUMP86 without i386.  Don't add prefix
9325      to PadLock instructions since they include prefixes in opcode.  */
9326   if (!align_branch_power
9327       || !align_branch_prefix_size
9328       || now_seg == absolute_section
9329       || i.tm.cpu_flags.bitfield.cpupadlock
9330       || !cpu_arch_flags.bitfield.cpui386)
9331     return 0;
9332
9333   /* Don't add prefix if it is a prefix or there is no operand in case
9334      that segment prefix is special.  */
9335   if (!i.operands || i.tm.opcode_modifier.isprefix)
9336     return 0;
9337
9338   if (last_insn.kind == last_insn_other
9339       || last_insn.seg != now_seg)
9340     return 1;
9341
9342   if (flag_debug)
9343     as_warn_where (last_insn.file, last_insn.line,
9344                    _("`%s` skips -malign-branch-boundary on `%s`"),
9345                    last_insn.name, i.tm.name);
9346
9347   return 0;
9348 }
9349
9350 /* Return 1 if a BRANCH_PADDING frag should be generated.  */
9351
9352 static int
9353 add_branch_padding_frag_p (enum align_branch_kind *branch_p,
9354                            enum mf_jcc_kind *mf_jcc_p)
9355 {
9356   int add_padding;
9357
9358   /* NB: Don't work with COND_JUMP86 without i386.  */
9359   if (!align_branch_power
9360       || now_seg == absolute_section
9361       || !cpu_arch_flags.bitfield.cpui386
9362       || i.tm.opcode_modifier.opcodespace != SPACE_BASE)
9363     return 0;
9364
9365   add_padding = 0;
9366
9367   /* Check for jcc and direct jmp.  */
9368   if (i.tm.opcode_modifier.jump == JUMP)
9369     {
9370       if (i.tm.base_opcode == JUMP_PC_RELATIVE)
9371         {
9372           *branch_p = align_branch_jmp;
9373           add_padding = align_branch & align_branch_jmp_bit;
9374         }
9375       else
9376         {
9377           /* Because J<cc> and JN<cc> share same group in macro-fusible table,
9378              igore the lowest bit.  */
9379           *mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
9380           *branch_p = align_branch_jcc;
9381           if ((align_branch & align_branch_jcc_bit))
9382             add_padding = 1;
9383         }
9384     }
9385   else if ((i.tm.base_opcode | 1) == 0xc3)
9386     {
9387       /* Near ret.  */
9388       *branch_p = align_branch_ret;
9389       if ((align_branch & align_branch_ret_bit))
9390         add_padding = 1;
9391     }
9392   else
9393     {
9394       /* Check for indirect jmp, direct and indirect calls.  */
9395       if (i.tm.base_opcode == 0xe8)
9396         {
9397           /* Direct call.  */
9398           *branch_p = align_branch_call;
9399           if ((align_branch & align_branch_call_bit))
9400             add_padding = 1;
9401         }
9402       else if (i.tm.base_opcode == 0xff
9403                && (i.tm.extension_opcode == 2
9404                    || i.tm.extension_opcode == 4))
9405         {
9406           /* Indirect call and jmp.  */
9407           *branch_p = align_branch_indirect;
9408           if ((align_branch & align_branch_indirect_bit))
9409             add_padding = 1;
9410         }
9411
9412       if (add_padding
9413           && i.disp_operands
9414           && tls_get_addr
9415           && (i.op[0].disps->X_op == O_symbol
9416               || (i.op[0].disps->X_op == O_subtract
9417                   && i.op[0].disps->X_op_symbol == GOT_symbol)))
9418         {
9419           symbolS *s = i.op[0].disps->X_add_symbol;
9420           /* No padding to call to global or undefined tls_get_addr.  */
9421           if ((S_IS_EXTERNAL (s) || !S_IS_DEFINED (s))
9422               && strcmp (S_GET_NAME (s), tls_get_addr) == 0)
9423             return 0;
9424         }
9425     }
9426
9427   if (add_padding
9428       && last_insn.kind != last_insn_other
9429       && last_insn.seg == now_seg)
9430     {
9431       if (flag_debug)
9432         as_warn_where (last_insn.file, last_insn.line,
9433                        _("`%s` skips -malign-branch-boundary on `%s`"),
9434                        last_insn.name, i.tm.name);
9435       return 0;
9436     }
9437
9438   return add_padding;
9439 }
9440
9441 static void
9442 output_insn (void)
9443 {
9444   fragS *insn_start_frag;
9445   offsetT insn_start_off;
9446   fragS *fragP = NULL;
9447   enum align_branch_kind branch = align_branch_none;
9448   /* The initializer is arbitrary just to avoid uninitialized error.
9449      it's actually either assigned in add_branch_padding_frag_p
9450      or never be used.  */
9451   enum mf_jcc_kind mf_jcc = mf_jcc_jo;
9452
9453 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9454   if (IS_ELF && x86_used_note && now_seg != absolute_section)
9455     {
9456       if ((i.xstate & xstate_tmm) == xstate_tmm
9457           || i.tm.cpu_flags.bitfield.cpuamx_tile)
9458         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_TMM;
9459
9460       if (i.tm.cpu_flags.bitfield.cpu8087
9461           || i.tm.cpu_flags.bitfield.cpu287
9462           || i.tm.cpu_flags.bitfield.cpu387
9463           || i.tm.cpu_flags.bitfield.cpu687
9464           || i.tm.cpu_flags.bitfield.cpufisttp)
9465         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
9466
9467       if ((i.xstate & xstate_mmx)
9468           || (i.tm.opcode_modifier.opcodespace == SPACE_0F
9469               && !is_any_vex_encoding (&i.tm)
9470               && (i.tm.base_opcode == 0x77 /* emms */
9471                   || i.tm.base_opcode == 0x0e /* femms */)))
9472         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX;
9473
9474       if (i.index_reg)
9475         {
9476           if (i.index_reg->reg_type.bitfield.zmmword)
9477             i.xstate |= xstate_zmm;
9478           else if (i.index_reg->reg_type.bitfield.ymmword)
9479             i.xstate |= xstate_ymm;
9480           else if (i.index_reg->reg_type.bitfield.xmmword)
9481             i.xstate |= xstate_xmm;
9482         }
9483
9484       /* vzeroall / vzeroupper */
9485       if (i.tm.base_opcode == 0x77 && i.tm.cpu_flags.bitfield.cpuavx)
9486         i.xstate |= xstate_ymm;
9487
9488       if ((i.xstate & xstate_xmm)
9489           /* ldmxcsr / stmxcsr / vldmxcsr / vstmxcsr */
9490           || (i.tm.base_opcode == 0xae
9491               && (i.tm.cpu_flags.bitfield.cpusse
9492                   || i.tm.cpu_flags.bitfield.cpuavx))
9493           || i.tm.cpu_flags.bitfield.cpuwidekl
9494           || i.tm.cpu_flags.bitfield.cpukl)
9495         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
9496
9497       if ((i.xstate & xstate_ymm) == xstate_ymm)
9498         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_YMM;
9499       if ((i.xstate & xstate_zmm) == xstate_zmm)
9500         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM;
9501       if (i.mask.reg || (i.xstate & xstate_mask) == xstate_mask)
9502         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MASK;
9503       if (i.tm.cpu_flags.bitfield.cpufxsr)
9504         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR;
9505       if (i.tm.cpu_flags.bitfield.cpuxsave)
9506         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVE;
9507       if (i.tm.cpu_flags.bitfield.cpuxsaveopt)
9508         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT;
9509       if (i.tm.cpu_flags.bitfield.cpuxsavec)
9510         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC;
9511
9512       if (x86_feature_2_used
9513           || i.tm.cpu_flags.bitfield.cpucmov
9514           || i.tm.cpu_flags.bitfield.cpusyscall
9515           || (i.tm.opcode_modifier.opcodespace == SPACE_0F
9516               && i.tm.base_opcode == 0xc7
9517               && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
9518               && i.tm.extension_opcode == 1) /* cmpxchg8b */)
9519         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_BASELINE;
9520       if (i.tm.cpu_flags.bitfield.cpusse3
9521           || i.tm.cpu_flags.bitfield.cpussse3
9522           || i.tm.cpu_flags.bitfield.cpusse4_1
9523           || i.tm.cpu_flags.bitfield.cpusse4_2
9524           || i.tm.cpu_flags.bitfield.cpucx16
9525           || i.tm.cpu_flags.bitfield.cpupopcnt
9526           /* LAHF-SAHF insns in 64-bit mode.  */
9527           || (flag_code == CODE_64BIT
9528               && (i.tm.base_opcode | 1) == 0x9f
9529               && i.tm.opcode_modifier.opcodespace == SPACE_BASE))
9530         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V2;
9531       if (i.tm.cpu_flags.bitfield.cpuavx
9532           || i.tm.cpu_flags.bitfield.cpuavx2
9533           /* Any VEX encoded insns execpt for AVX512F, AVX512BW, AVX512DQ,
9534              XOP, FMA4, LPW, TBM, and AMX.  */
9535           || (i.tm.opcode_modifier.vex
9536               && !i.tm.cpu_flags.bitfield.cpuavx512f
9537               && !i.tm.cpu_flags.bitfield.cpuavx512bw
9538               && !i.tm.cpu_flags.bitfield.cpuavx512dq
9539               && !i.tm.cpu_flags.bitfield.cpuxop
9540               && !i.tm.cpu_flags.bitfield.cpufma4
9541               && !i.tm.cpu_flags.bitfield.cpulwp
9542               && !i.tm.cpu_flags.bitfield.cputbm
9543               && !(x86_feature_2_used & GNU_PROPERTY_X86_FEATURE_2_TMM))
9544           || i.tm.cpu_flags.bitfield.cpuf16c
9545           || i.tm.cpu_flags.bitfield.cpufma
9546           || i.tm.cpu_flags.bitfield.cpulzcnt
9547           || i.tm.cpu_flags.bitfield.cpumovbe
9548           || i.tm.cpu_flags.bitfield.cpuxsaves
9549           || (x86_feature_2_used
9550               & (GNU_PROPERTY_X86_FEATURE_2_XSAVE
9551                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT
9552                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEC)) != 0)
9553         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V3;
9554       if (i.tm.cpu_flags.bitfield.cpuavx512f
9555           || i.tm.cpu_flags.bitfield.cpuavx512bw
9556           || i.tm.cpu_flags.bitfield.cpuavx512dq
9557           || i.tm.cpu_flags.bitfield.cpuavx512vl
9558           /* Any EVEX encoded insns except for AVX512ER, AVX512PF,
9559              AVX512-4FMAPS, and AVX512-4VNNIW.  */
9560           || (i.tm.opcode_modifier.evex
9561               && !i.tm.cpu_flags.bitfield.cpuavx512er
9562               && !i.tm.cpu_flags.bitfield.cpuavx512pf
9563               && !i.tm.cpu_flags.bitfield.cpuavx512_4fmaps
9564               && !i.tm.cpu_flags.bitfield.cpuavx512_4vnniw))
9565         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V4;
9566     }
9567 #endif
9568
9569   /* Tie dwarf2 debug info to the address at the start of the insn.
9570      We can't do this after the insn has been output as the current
9571      frag may have been closed off.  eg. by frag_var.  */
9572   dwarf2_emit_insn (0);
9573
9574   insn_start_frag = frag_now;
9575   insn_start_off = frag_now_fix ();
9576
9577   if (add_branch_padding_frag_p (&branch, &mf_jcc))
9578     {
9579       char *p;
9580       /* Branch can be 8 bytes.  Leave some room for prefixes.  */
9581       unsigned int max_branch_padding_size = 14;
9582
9583       /* Align section to boundary.  */
9584       record_alignment (now_seg, align_branch_power);
9585
9586       /* Make room for padding.  */
9587       frag_grow (max_branch_padding_size);
9588
9589       /* Start of the padding.  */
9590       p = frag_more (0);
9591
9592       fragP = frag_now;
9593
9594       frag_var (rs_machine_dependent, max_branch_padding_size, 0,
9595                 ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
9596                 NULL, 0, p);
9597
9598       fragP->tc_frag_data.mf_type = mf_jcc;
9599       fragP->tc_frag_data.branch_type = branch;
9600       fragP->tc_frag_data.max_bytes = max_branch_padding_size;
9601     }
9602
9603   if (!cpu_arch_flags.bitfield.cpui386 && (flag_code != CODE_16BIT)
9604       && !pre_386_16bit_warned)
9605     {
9606       as_warn (_("use .code16 to ensure correct addressing mode"));
9607       pre_386_16bit_warned = true;
9608     }
9609
9610   /* Output jumps.  */
9611   if (i.tm.opcode_modifier.jump == JUMP)
9612     output_branch ();
9613   else if (i.tm.opcode_modifier.jump == JUMP_BYTE
9614            || i.tm.opcode_modifier.jump == JUMP_DWORD)
9615     output_jump ();
9616   else if (i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT)
9617     output_interseg_jump ();
9618   else
9619     {
9620       /* Output normal instructions here.  */
9621       char *p;
9622       unsigned char *q;
9623       unsigned int j;
9624       enum mf_cmp_kind mf_cmp;
9625
9626       if (avoid_fence
9627           && (i.tm.base_opcode == 0xaee8
9628               || i.tm.base_opcode == 0xaef0
9629               || i.tm.base_opcode == 0xaef8))
9630         {
9631           /* Encode lfence, mfence, and sfence as
9632              f0 83 04 24 00   lock addl $0x0, (%{re}sp).  */
9633           if (flag_code == CODE_16BIT)
9634             as_bad (_("Cannot convert `%s' in 16-bit mode"), i.tm.name);
9635           else if (omit_lock_prefix)
9636             as_bad (_("Cannot convert `%s' with `-momit-lock-prefix=yes' in effect"),
9637                     i.tm.name);
9638           else if (now_seg != absolute_section)
9639             {
9640               offsetT val = 0x240483f0ULL;
9641
9642               p = frag_more (5);
9643               md_number_to_chars (p, val, 5);
9644             }
9645           else
9646             abs_section_offset += 5;
9647           return;
9648         }
9649
9650       /* Some processors fail on LOCK prefix. This options makes
9651          assembler ignore LOCK prefix and serves as a workaround.  */
9652       if (omit_lock_prefix)
9653         {
9654           if (i.tm.base_opcode == LOCK_PREFIX_OPCODE
9655               && i.tm.opcode_modifier.isprefix)
9656             return;
9657           i.prefix[LOCK_PREFIX] = 0;
9658         }
9659
9660       if (branch)
9661         /* Skip if this is a branch.  */
9662         ;
9663       else if (add_fused_jcc_padding_frag_p (&mf_cmp))
9664         {
9665           /* Make room for padding.  */
9666           frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
9667           p = frag_more (0);
9668
9669           fragP = frag_now;
9670
9671           frag_var (rs_machine_dependent, MAX_FUSED_JCC_PADDING_SIZE, 0,
9672                     ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
9673                     NULL, 0, p);
9674
9675           fragP->tc_frag_data.mf_type = mf_cmp;
9676           fragP->tc_frag_data.branch_type = align_branch_fused;
9677           fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
9678         }
9679       else if (add_branch_prefix_frag_p ())
9680         {
9681           unsigned int max_prefix_size = align_branch_prefix_size;
9682
9683           /* Make room for padding.  */
9684           frag_grow (max_prefix_size);
9685           p = frag_more (0);
9686
9687           fragP = frag_now;
9688
9689           frag_var (rs_machine_dependent, max_prefix_size, 0,
9690                     ENCODE_RELAX_STATE (BRANCH_PREFIX, 0),
9691                     NULL, 0, p);
9692
9693           fragP->tc_frag_data.max_bytes = max_prefix_size;
9694         }
9695
9696       /* Since the VEX/EVEX prefix contains the implicit prefix, we
9697          don't need the explicit prefix.  */
9698       if (!is_any_vex_encoding (&i.tm))
9699         {
9700           switch (i.tm.opcode_modifier.opcodeprefix)
9701             {
9702             case PREFIX_0X66:
9703               add_prefix (0x66);
9704               break;
9705             case PREFIX_0XF2:
9706               add_prefix (0xf2);
9707               break;
9708             case PREFIX_0XF3:
9709               if (!i.tm.cpu_flags.bitfield.cpupadlock
9710                   || (i.prefix[REP_PREFIX] != 0xf3))
9711                 add_prefix (0xf3);
9712               break;
9713             case PREFIX_NONE:
9714               switch (i.opcode_length)
9715                 {
9716                 case 2:
9717                   break;
9718                 case 1:
9719                   /* Check for pseudo prefixes.  */
9720                   if (!i.tm.opcode_modifier.isprefix || i.tm.base_opcode)
9721                     break;
9722                   as_bad_where (insn_start_frag->fr_file,
9723                                 insn_start_frag->fr_line,
9724                                 _("pseudo prefix without instruction"));
9725                   return;
9726                 default:
9727                   abort ();
9728                 }
9729               break;
9730             default:
9731               abort ();
9732             }
9733
9734 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
9735           /* For x32, add a dummy REX_OPCODE prefix for mov/add with
9736              R_X86_64_GOTTPOFF relocation so that linker can safely
9737              perform IE->LE optimization.  A dummy REX_OPCODE prefix
9738              is also needed for lea with R_X86_64_GOTPC32_TLSDESC
9739              relocation for GDesc -> IE/LE optimization.  */
9740           if (x86_elf_abi == X86_64_X32_ABI
9741               && i.operands == 2
9742               && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
9743                   || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
9744               && i.prefix[REX_PREFIX] == 0)
9745             add_prefix (REX_OPCODE);
9746 #endif
9747
9748           /* The prefix bytes.  */
9749           for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
9750             if (*q)
9751               frag_opcode_byte (*q);
9752         }
9753       else
9754         {
9755           for (j = 0, q = i.prefix; j < ARRAY_SIZE (i.prefix); j++, q++)
9756             if (*q)
9757               switch (j)
9758                 {
9759                 case SEG_PREFIX:
9760                 case ADDR_PREFIX:
9761                   frag_opcode_byte (*q);
9762                   break;
9763                 default:
9764                   /* There should be no other prefixes for instructions
9765                      with VEX prefix.  */
9766                   abort ();
9767                 }
9768
9769           /* For EVEX instructions i.vrex should become 0 after
9770              build_evex_prefix.  For VEX instructions upper 16 registers
9771              aren't available, so VREX should be 0.  */
9772           if (i.vrex)
9773             abort ();
9774           /* Now the VEX prefix.  */
9775           if (now_seg != absolute_section)
9776             {
9777               p = frag_more (i.vex.length);
9778               for (j = 0; j < i.vex.length; j++)
9779                 p[j] = i.vex.bytes[j];
9780             }
9781           else
9782             abs_section_offset += i.vex.length;
9783         }
9784
9785       /* Now the opcode; be careful about word order here!  */
9786       j = i.opcode_length;
9787       if (!i.vex.length)
9788         switch (i.tm.opcode_modifier.opcodespace)
9789           {
9790           case SPACE_BASE:
9791             break;
9792           case SPACE_0F:
9793             ++j;
9794             break;
9795           case SPACE_0F38:
9796           case SPACE_0F3A:
9797             j += 2;
9798             break;
9799           default:
9800             abort ();
9801           }
9802
9803       if (now_seg == absolute_section)
9804         abs_section_offset += j;
9805       else if (j == 1)
9806         {
9807           FRAG_APPEND_1_CHAR (i.tm.base_opcode);
9808         }
9809       else
9810         {
9811           p = frag_more (j);
9812           if (!i.vex.length
9813               && i.tm.opcode_modifier.opcodespace != SPACE_BASE)
9814             {
9815               *p++ = 0x0f;
9816               if (i.tm.opcode_modifier.opcodespace != SPACE_0F)
9817                 *p++ = i.tm.opcode_modifier.opcodespace == SPACE_0F38
9818                        ? 0x38 : 0x3a;
9819             }
9820
9821           switch (i.opcode_length)
9822             {
9823             case 2:
9824               /* Put out high byte first: can't use md_number_to_chars!  */
9825               *p++ = (i.tm.base_opcode >> 8) & 0xff;
9826               /* Fall through.  */
9827             case 1:
9828               *p = i.tm.base_opcode & 0xff;
9829               break;
9830             default:
9831               abort ();
9832               break;
9833             }
9834
9835         }
9836
9837       /* Now the modrm byte and sib byte (if present).  */
9838       if (i.tm.opcode_modifier.modrm)
9839         {
9840           frag_opcode_byte ((i.rm.regmem << 0)
9841                              | (i.rm.reg << 3)
9842                              | (i.rm.mode << 6));
9843           /* If i.rm.regmem == ESP (4)
9844              && i.rm.mode != (Register mode)
9845              && not 16 bit
9846              ==> need second modrm byte.  */
9847           if (i.rm.regmem == ESCAPE_TO_TWO_BYTE_ADDRESSING
9848               && i.rm.mode != 3
9849               && !(i.base_reg && i.base_reg->reg_type.bitfield.word))
9850             frag_opcode_byte ((i.sib.base << 0)
9851                               | (i.sib.index << 3)
9852                               | (i.sib.scale << 6));
9853         }
9854
9855       if (i.disp_operands)
9856         output_disp (insn_start_frag, insn_start_off);
9857
9858       if (i.imm_operands)
9859         output_imm (insn_start_frag, insn_start_off);
9860
9861       /*
9862        * frag_now_fix () returning plain abs_section_offset when we're in the
9863        * absolute section, and abs_section_offset not getting updated as data
9864        * gets added to the frag breaks the logic below.
9865        */
9866       if (now_seg != absolute_section)
9867         {
9868           j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
9869           if (j > 15)
9870             as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
9871                      j);
9872           else if (fragP)
9873             {
9874               /* NB: Don't add prefix with GOTPC relocation since
9875                  output_disp() above depends on the fixed encoding
9876                  length.  Can't add prefix with TLS relocation since
9877                  it breaks TLS linker optimization.  */
9878               unsigned int max = i.has_gotpc_tls_reloc ? 0 : 15 - j;
9879               /* Prefix count on the current instruction.  */
9880               unsigned int count = i.vex.length;
9881               unsigned int k;
9882               for (k = 0; k < ARRAY_SIZE (i.prefix); k++)
9883                 /* REX byte is encoded in VEX/EVEX prefix.  */
9884                 if (i.prefix[k] && (k != REX_PREFIX || !i.vex.length))
9885                   count++;
9886
9887               /* Count prefixes for extended opcode maps.  */
9888               if (!i.vex.length)
9889                 switch (i.tm.opcode_modifier.opcodespace)
9890                   {
9891                   case SPACE_BASE:
9892                     break;
9893                   case SPACE_0F:
9894                     count++;
9895                     break;
9896                   case SPACE_0F38:
9897                   case SPACE_0F3A:
9898                     count += 2;
9899                     break;
9900                   default:
9901                     abort ();
9902                   }
9903
9904               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
9905                   == BRANCH_PREFIX)
9906                 {
9907                   /* Set the maximum prefix size in BRANCH_PREFIX
9908                      frag.  */
9909                   if (fragP->tc_frag_data.max_bytes > max)
9910                     fragP->tc_frag_data.max_bytes = max;
9911                   if (fragP->tc_frag_data.max_bytes > count)
9912                     fragP->tc_frag_data.max_bytes -= count;
9913                   else
9914                     fragP->tc_frag_data.max_bytes = 0;
9915                 }
9916               else
9917                 {
9918                   /* Remember the maximum prefix size in FUSED_JCC_PADDING
9919                      frag.  */
9920                   unsigned int max_prefix_size;
9921                   if (align_branch_prefix_size > max)
9922                     max_prefix_size = max;
9923                   else
9924                     max_prefix_size = align_branch_prefix_size;
9925                   if (max_prefix_size > count)
9926                     fragP->tc_frag_data.max_prefix_length
9927                       = max_prefix_size - count;
9928                 }
9929
9930               /* Use existing segment prefix if possible.  Use CS
9931                  segment prefix in 64-bit mode.  In 32-bit mode, use SS
9932                  segment prefix with ESP/EBP base register and use DS
9933                  segment prefix without ESP/EBP base register.  */
9934               if (i.prefix[SEG_PREFIX])
9935                 fragP->tc_frag_data.default_prefix = i.prefix[SEG_PREFIX];
9936               else if (flag_code == CODE_64BIT)
9937                 fragP->tc_frag_data.default_prefix = CS_PREFIX_OPCODE;
9938               else if (i.base_reg
9939                        && (i.base_reg->reg_num == 4
9940                            || i.base_reg->reg_num == 5))
9941                 fragP->tc_frag_data.default_prefix = SS_PREFIX_OPCODE;
9942               else
9943                 fragP->tc_frag_data.default_prefix = DS_PREFIX_OPCODE;
9944             }
9945         }
9946     }
9947
9948   /* NB: Don't work with COND_JUMP86 without i386.  */
9949   if (align_branch_power
9950       && now_seg != absolute_section
9951       && cpu_arch_flags.bitfield.cpui386)
9952     {
9953       /* Terminate each frag so that we can add prefix and check for
9954          fused jcc.  */
9955       frag_wane (frag_now);
9956       frag_new (0);
9957     }
9958
9959 #ifdef DEBUG386
9960   if (flag_debug)
9961     {
9962       pi ("" /*line*/, &i);
9963     }
9964 #endif /* DEBUG386  */
9965 }
9966
9967 /* Return the size of the displacement operand N.  */
9968
9969 static int
9970 disp_size (unsigned int n)
9971 {
9972   int size = 4;
9973
9974   if (i.types[n].bitfield.disp64)
9975     size = 8;
9976   else if (i.types[n].bitfield.disp8)
9977     size = 1;
9978   else if (i.types[n].bitfield.disp16)
9979     size = 2;
9980   return size;
9981 }
9982
9983 /* Return the size of the immediate operand N.  */
9984
9985 static int
9986 imm_size (unsigned int n)
9987 {
9988   int size = 4;
9989   if (i.types[n].bitfield.imm64)
9990     size = 8;
9991   else if (i.types[n].bitfield.imm8 || i.types[n].bitfield.imm8s)
9992     size = 1;
9993   else if (i.types[n].bitfield.imm16)
9994     size = 2;
9995   return size;
9996 }
9997
9998 static void
9999 output_disp (fragS *insn_start_frag, offsetT insn_start_off)
10000 {
10001   char *p;
10002   unsigned int n;
10003
10004   for (n = 0; n < i.operands; n++)
10005     {
10006       if (operand_type_check (i.types[n], disp))
10007         {
10008           int size = disp_size (n);
10009
10010           if (now_seg == absolute_section)
10011             abs_section_offset += size;
10012           else if (i.op[n].disps->X_op == O_constant)
10013             {
10014               offsetT val = i.op[n].disps->X_add_number;
10015
10016               val = offset_in_range (val >> (size == 1 ? i.memshift : 0),
10017                                      size);
10018               p = frag_more (size);
10019               md_number_to_chars (p, val, size);
10020             }
10021           else
10022             {
10023               enum bfd_reloc_code_real reloc_type;
10024               bool pcrel = (i.flags[n] & Operand_PCrel) != 0;
10025               bool sign = (flag_code == CODE_64BIT && size == 4
10026                            && (!want_disp32 (&i.tm)
10027                                || (i.tm.opcode_modifier.jump && !i.jumpabsolute
10028                                    && !i.types[n].bitfield.baseindex)))
10029                           || pcrel;
10030               fixS *fixP;
10031
10032               /* We can't have 8 bit displacement here.  */
10033               gas_assert (!i.types[n].bitfield.disp8);
10034
10035               /* The PC relative address is computed relative
10036                  to the instruction boundary, so in case immediate
10037                  fields follows, we need to adjust the value.  */
10038               if (pcrel && i.imm_operands)
10039                 {
10040                   unsigned int n1;
10041                   int sz = 0;
10042
10043                   for (n1 = 0; n1 < i.operands; n1++)
10044                     if (operand_type_check (i.types[n1], imm))
10045                       {
10046                         /* Only one immediate is allowed for PC
10047                            relative address.  */
10048                         gas_assert (sz == 0);
10049                         sz = imm_size (n1);
10050                         i.op[n].disps->X_add_number -= sz;
10051                       }
10052                   /* We should find the immediate.  */
10053                   gas_assert (sz != 0);
10054                 }
10055
10056               p = frag_more (size);
10057               reloc_type = reloc (size, pcrel, sign, i.reloc[n]);
10058               if (GOT_symbol
10059                   && GOT_symbol == i.op[n].disps->X_add_symbol
10060                   && (((reloc_type == BFD_RELOC_32
10061                         || reloc_type == BFD_RELOC_X86_64_32S
10062                         || (reloc_type == BFD_RELOC_64
10063                             && object_64bit))
10064                        && (i.op[n].disps->X_op == O_symbol
10065                            || (i.op[n].disps->X_op == O_add
10066                                && ((symbol_get_value_expression
10067                                     (i.op[n].disps->X_op_symbol)->X_op)
10068                                    == O_subtract))))
10069                       || reloc_type == BFD_RELOC_32_PCREL))
10070                 {
10071                   if (!object_64bit)
10072                     {
10073                       reloc_type = BFD_RELOC_386_GOTPC;
10074                       i.has_gotpc_tls_reloc = true;
10075                       i.op[n].disps->X_add_number +=
10076                         encoding_length (insn_start_frag, insn_start_off, p);
10077                     }
10078                   else if (reloc_type == BFD_RELOC_64)
10079                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
10080                   else
10081                     /* Don't do the adjustment for x86-64, as there
10082                        the pcrel addressing is relative to the _next_
10083                        insn, and that is taken care of in other code.  */
10084                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
10085                 }
10086               else if (align_branch_power)
10087                 {
10088                   switch (reloc_type)
10089                     {
10090                     case BFD_RELOC_386_TLS_GD:
10091                     case BFD_RELOC_386_TLS_LDM:
10092                     case BFD_RELOC_386_TLS_IE:
10093                     case BFD_RELOC_386_TLS_IE_32:
10094                     case BFD_RELOC_386_TLS_GOTIE:
10095                     case BFD_RELOC_386_TLS_GOTDESC:
10096                     case BFD_RELOC_386_TLS_DESC_CALL:
10097                     case BFD_RELOC_X86_64_TLSGD:
10098                     case BFD_RELOC_X86_64_TLSLD:
10099                     case BFD_RELOC_X86_64_GOTTPOFF:
10100                     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
10101                     case BFD_RELOC_X86_64_TLSDESC_CALL:
10102                       i.has_gotpc_tls_reloc = true;
10103                     default:
10104                       break;
10105                     }
10106                 }
10107               fixP = fix_new_exp (frag_now, p - frag_now->fr_literal,
10108                                   size, i.op[n].disps, pcrel,
10109                                   reloc_type);
10110
10111               if (flag_code == CODE_64BIT && size == 4 && pcrel
10112                   && !i.prefix[ADDR_PREFIX])
10113                 fixP->fx_signed = 1;
10114
10115               /* Check for "call/jmp *mem", "mov mem, %reg",
10116                  "test %reg, mem" and "binop mem, %reg" where binop
10117                  is one of adc, add, and, cmp, or, sbb, sub, xor
10118                  instructions without data prefix.  Always generate
10119                  R_386_GOT32X for "sym*GOT" operand in 32-bit mode.  */
10120               if (i.prefix[DATA_PREFIX] == 0
10121                   && (generate_relax_relocations
10122                       || (!object_64bit
10123                           && i.rm.mode == 0
10124                           && i.rm.regmem == 5))
10125                   && (i.rm.mode == 2
10126                       || (i.rm.mode == 0 && i.rm.regmem == 5))
10127                   && i.tm.opcode_modifier.opcodespace == SPACE_BASE
10128                   && ((i.operands == 1
10129                        && i.tm.base_opcode == 0xff
10130                        && (i.rm.reg == 2 || i.rm.reg == 4))
10131                       || (i.operands == 2
10132                           && (i.tm.base_opcode == 0x8b
10133                               || i.tm.base_opcode == 0x85
10134                               || (i.tm.base_opcode & ~0x38) == 0x03))))
10135                 {
10136                   if (object_64bit)
10137                     {
10138                       fixP->fx_tcbit = i.rex != 0;
10139                       if (i.base_reg
10140                           && (i.base_reg->reg_num == RegIP))
10141                       fixP->fx_tcbit2 = 1;
10142                     }
10143                   else
10144                     fixP->fx_tcbit2 = 1;
10145                 }
10146             }
10147         }
10148     }
10149 }
10150
10151 static void
10152 output_imm (fragS *insn_start_frag, offsetT insn_start_off)
10153 {
10154   char *p;
10155   unsigned int n;
10156
10157   for (n = 0; n < i.operands; n++)
10158     {
10159       if (operand_type_check (i.types[n], imm))
10160         {
10161           int size = imm_size (n);
10162
10163           if (now_seg == absolute_section)
10164             abs_section_offset += size;
10165           else if (i.op[n].imms->X_op == O_constant)
10166             {
10167               offsetT val;
10168
10169               val = offset_in_range (i.op[n].imms->X_add_number,
10170                                      size);
10171               p = frag_more (size);
10172               md_number_to_chars (p, val, size);
10173             }
10174           else
10175             {
10176               /* Not absolute_section.
10177                  Need a 32-bit fixup (don't support 8bit
10178                  non-absolute imms).  Try to support other
10179                  sizes ...  */
10180               enum bfd_reloc_code_real reloc_type;
10181               int sign;
10182
10183               if (i.types[n].bitfield.imm32s
10184                   && (i.suffix == QWORD_MNEM_SUFFIX
10185                       || (!i.suffix && i.tm.opcode_modifier.no_lsuf)))
10186                 sign = 1;
10187               else
10188                 sign = 0;
10189
10190               p = frag_more (size);
10191               reloc_type = reloc (size, 0, sign, i.reloc[n]);
10192
10193               /*   This is tough to explain.  We end up with this one if we
10194                * have operands that look like
10195                * "_GLOBAL_OFFSET_TABLE_+[.-.L284]".  The goal here is to
10196                * obtain the absolute address of the GOT, and it is strongly
10197                * preferable from a performance point of view to avoid using
10198                * a runtime relocation for this.  The actual sequence of
10199                * instructions often look something like:
10200                *
10201                *        call    .L66
10202                * .L66:
10203                *        popl    %ebx
10204                *        addl    $_GLOBAL_OFFSET_TABLE_+[.-.L66],%ebx
10205                *
10206                *   The call and pop essentially return the absolute address
10207                * of the label .L66 and store it in %ebx.  The linker itself
10208                * will ultimately change the first operand of the addl so
10209                * that %ebx points to the GOT, but to keep things simple, the
10210                * .o file must have this operand set so that it generates not
10211                * the absolute address of .L66, but the absolute address of
10212                * itself.  This allows the linker itself simply treat a GOTPC
10213                * relocation as asking for a pcrel offset to the GOT to be
10214                * added in, and the addend of the relocation is stored in the
10215                * operand field for the instruction itself.
10216                *
10217                *   Our job here is to fix the operand so that it would add
10218                * the correct offset so that %ebx would point to itself.  The
10219                * thing that is tricky is that .-.L66 will point to the
10220                * beginning of the instruction, so we need to further modify
10221                * the operand so that it will point to itself.  There are
10222                * other cases where you have something like:
10223                *
10224                *        .long   $_GLOBAL_OFFSET_TABLE_+[.-.L66]
10225                *
10226                * and here no correction would be required.  Internally in
10227                * the assembler we treat operands of this form as not being
10228                * pcrel since the '.' is explicitly mentioned, and I wonder
10229                * whether it would simplify matters to do it this way.  Who
10230                * knows.  In earlier versions of the PIC patches, the
10231                * pcrel_adjust field was used to store the correction, but
10232                * since the expression is not pcrel, I felt it would be
10233                * confusing to do it this way.  */
10234
10235               if ((reloc_type == BFD_RELOC_32
10236                    || reloc_type == BFD_RELOC_X86_64_32S
10237                    || reloc_type == BFD_RELOC_64)
10238                   && GOT_symbol
10239                   && GOT_symbol == i.op[n].imms->X_add_symbol
10240                   && (i.op[n].imms->X_op == O_symbol
10241                       || (i.op[n].imms->X_op == O_add
10242                           && ((symbol_get_value_expression
10243                                (i.op[n].imms->X_op_symbol)->X_op)
10244                               == O_subtract))))
10245                 {
10246                   if (!object_64bit)
10247                     reloc_type = BFD_RELOC_386_GOTPC;
10248                   else if (size == 4)
10249                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
10250                   else if (size == 8)
10251                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
10252                   i.has_gotpc_tls_reloc = true;
10253                   i.op[n].imms->X_add_number +=
10254                     encoding_length (insn_start_frag, insn_start_off, p);
10255                 }
10256               fix_new_exp (frag_now, p - frag_now->fr_literal, size,
10257                            i.op[n].imms, 0, reloc_type);
10258             }
10259         }
10260     }
10261 }
10262 \f
10263 /* x86_cons_fix_new is called via the expression parsing code when a
10264    reloc is needed.  We use this hook to get the correct .got reloc.  */
10265 static int cons_sign = -1;
10266
10267 void
10268 x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
10269                   expressionS *exp, bfd_reloc_code_real_type r)
10270 {
10271   r = reloc (len, 0, cons_sign, r);
10272
10273 #ifdef TE_PE
10274   if (exp->X_op == O_secrel)
10275     {
10276       exp->X_op = O_symbol;
10277       r = BFD_RELOC_32_SECREL;
10278     }
10279   else if (exp->X_op == O_secidx)
10280     r = BFD_RELOC_16_SECIDX;
10281 #endif
10282
10283   fix_new_exp (frag, off, len, exp, 0, r);
10284 }
10285
10286 /* Export the ABI address size for use by TC_ADDRESS_BYTES for the
10287    purpose of the `.dc.a' internal pseudo-op.  */
10288
10289 int
10290 x86_address_bytes (void)
10291 {
10292   if ((stdoutput->arch_info->mach & bfd_mach_x64_32))
10293     return 4;
10294   return stdoutput->arch_info->bits_per_address / 8;
10295 }
10296
10297 #if (!(defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) || defined (OBJ_MACH_O)) \
10298      || defined (LEX_AT)) && !defined (TE_PE)
10299 # define lex_got(reloc, adjust, types) NULL
10300 #else
10301 /* Parse operands of the form
10302    <symbol>@GOTOFF+<nnn>
10303    and similar .plt or .got references.
10304
10305    If we find one, set up the correct relocation in RELOC and copy the
10306    input string, minus the `@GOTOFF' into a malloc'd buffer for
10307    parsing by the calling routine.  Return this buffer, and if ADJUST
10308    is non-null set it to the length of the string we removed from the
10309    input line.  Otherwise return NULL.  */
10310 static char *
10311 lex_got (enum bfd_reloc_code_real *rel,
10312          int *adjust,
10313          i386_operand_type *types)
10314 {
10315   /* Some of the relocations depend on the size of what field is to
10316      be relocated.  But in our callers i386_immediate and i386_displacement
10317      we don't yet know the operand size (this will be set by insn
10318      matching).  Hence we record the word32 relocation here,
10319      and adjust the reloc according to the real size in reloc().  */
10320   static const struct
10321   {
10322     const char *str;
10323     int len;
10324     const enum bfd_reloc_code_real rel[2];
10325     const i386_operand_type types64;
10326     bool need_GOT_symbol;
10327   }
10328     gotrel[] =
10329   {
10330 #ifndef TE_PE
10331 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
10332     { STRING_COMMA_LEN ("SIZE"),      { BFD_RELOC_SIZE32,
10333                                         BFD_RELOC_SIZE32 },
10334       OPERAND_TYPE_IMM32_64, false },
10335 #endif
10336     { STRING_COMMA_LEN ("PLTOFF"),   { _dummy_first_bfd_reloc_code_real,
10337                                        BFD_RELOC_X86_64_PLTOFF64 },
10338       OPERAND_TYPE_IMM64, true },
10339     { STRING_COMMA_LEN ("PLT"),      { BFD_RELOC_386_PLT32,
10340                                        BFD_RELOC_X86_64_PLT32    },
10341       OPERAND_TYPE_IMM32_32S_DISP32, false },
10342     { STRING_COMMA_LEN ("GOTPLT"),   { _dummy_first_bfd_reloc_code_real,
10343                                        BFD_RELOC_X86_64_GOTPLT64 },
10344       OPERAND_TYPE_IMM64_DISP64, true },
10345     { STRING_COMMA_LEN ("GOTOFF"),   { BFD_RELOC_386_GOTOFF,
10346                                        BFD_RELOC_X86_64_GOTOFF64 },
10347       OPERAND_TYPE_IMM64_DISP64, true },
10348     { STRING_COMMA_LEN ("GOTPCREL"), { _dummy_first_bfd_reloc_code_real,
10349                                        BFD_RELOC_X86_64_GOTPCREL },
10350       OPERAND_TYPE_IMM32_32S_DISP32, true },
10351     { STRING_COMMA_LEN ("TLSGD"),    { BFD_RELOC_386_TLS_GD,
10352                                        BFD_RELOC_X86_64_TLSGD    },
10353       OPERAND_TYPE_IMM32_32S_DISP32, true },
10354     { STRING_COMMA_LEN ("TLSLDM"),   { BFD_RELOC_386_TLS_LDM,
10355                                        _dummy_first_bfd_reloc_code_real },
10356       OPERAND_TYPE_NONE, true },
10357     { STRING_COMMA_LEN ("TLSLD"),    { _dummy_first_bfd_reloc_code_real,
10358                                        BFD_RELOC_X86_64_TLSLD    },
10359       OPERAND_TYPE_IMM32_32S_DISP32, true },
10360     { STRING_COMMA_LEN ("GOTTPOFF"), { BFD_RELOC_386_TLS_IE_32,
10361                                        BFD_RELOC_X86_64_GOTTPOFF },
10362       OPERAND_TYPE_IMM32_32S_DISP32, true },
10363     { STRING_COMMA_LEN ("TPOFF"),    { BFD_RELOC_386_TLS_LE_32,
10364                                        BFD_RELOC_X86_64_TPOFF32  },
10365       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
10366     { STRING_COMMA_LEN ("NTPOFF"),   { BFD_RELOC_386_TLS_LE,
10367                                        _dummy_first_bfd_reloc_code_real },
10368       OPERAND_TYPE_NONE, true },
10369     { STRING_COMMA_LEN ("DTPOFF"),   { BFD_RELOC_386_TLS_LDO_32,
10370                                        BFD_RELOC_X86_64_DTPOFF32 },
10371       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
10372     { STRING_COMMA_LEN ("GOTNTPOFF"),{ BFD_RELOC_386_TLS_GOTIE,
10373                                        _dummy_first_bfd_reloc_code_real },
10374       OPERAND_TYPE_NONE, true },
10375     { STRING_COMMA_LEN ("INDNTPOFF"),{ BFD_RELOC_386_TLS_IE,
10376                                        _dummy_first_bfd_reloc_code_real },
10377       OPERAND_TYPE_NONE, true },
10378     { STRING_COMMA_LEN ("GOT"),      { BFD_RELOC_386_GOT32,
10379                                        BFD_RELOC_X86_64_GOT32    },
10380       OPERAND_TYPE_IMM32_32S_64_DISP32, true },
10381     { STRING_COMMA_LEN ("TLSDESC"),  { BFD_RELOC_386_TLS_GOTDESC,
10382                                        BFD_RELOC_X86_64_GOTPC32_TLSDESC },
10383       OPERAND_TYPE_IMM32_32S_DISP32, true },
10384     { STRING_COMMA_LEN ("TLSCALL"),  { BFD_RELOC_386_TLS_DESC_CALL,
10385                                        BFD_RELOC_X86_64_TLSDESC_CALL },
10386       OPERAND_TYPE_IMM32_32S_DISP32, true },
10387 #else /* TE_PE */
10388     { STRING_COMMA_LEN ("SECREL32"), { BFD_RELOC_32_SECREL,
10389                                        BFD_RELOC_32_SECREL },
10390       OPERAND_TYPE_IMM32_32S_64_DISP32_64, false },
10391 #endif
10392   };
10393   char *cp;
10394   unsigned int j;
10395
10396 #if defined (OBJ_MAYBE_ELF) && !defined (TE_PE)
10397   if (!IS_ELF)
10398     return NULL;
10399 #endif
10400
10401   for (cp = input_line_pointer; *cp != '@'; cp++)
10402     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
10403       return NULL;
10404
10405   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
10406     {
10407       int len = gotrel[j].len;
10408       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
10409         {
10410           if (gotrel[j].rel[object_64bit] != 0)
10411             {
10412               int first, second;
10413               char *tmpbuf, *past_reloc;
10414
10415               *rel = gotrel[j].rel[object_64bit];
10416
10417               if (types)
10418                 {
10419                   if (flag_code != CODE_64BIT)
10420                     {
10421                       types->bitfield.imm32 = 1;
10422                       types->bitfield.disp32 = 1;
10423                     }
10424                   else
10425                     *types = gotrel[j].types64;
10426                 }
10427
10428               if (gotrel[j].need_GOT_symbol && GOT_symbol == NULL)
10429                 GOT_symbol = symbol_find_or_make (GLOBAL_OFFSET_TABLE_NAME);
10430
10431               /* The length of the first part of our input line.  */
10432               first = cp - input_line_pointer;
10433
10434               /* The second part goes from after the reloc token until
10435                  (and including) an end_of_line char or comma.  */
10436               past_reloc = cp + 1 + len;
10437               cp = past_reloc;
10438               while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
10439                 ++cp;
10440               second = cp + 1 - past_reloc;
10441
10442               /* Allocate and copy string.  The trailing NUL shouldn't
10443                  be necessary, but be safe.  */
10444               tmpbuf = XNEWVEC (char, first + second + 2);
10445               memcpy (tmpbuf, input_line_pointer, first);
10446               if (second != 0 && *past_reloc != ' ')
10447                 /* Replace the relocation token with ' ', so that
10448                    errors like foo@GOTOFF1 will be detected.  */
10449                 tmpbuf[first++] = ' ';
10450               else
10451                 /* Increment length by 1 if the relocation token is
10452                    removed.  */
10453                 len++;
10454               if (adjust)
10455                 *adjust = len;
10456               memcpy (tmpbuf + first, past_reloc, second);
10457               tmpbuf[first + second] = '\0';
10458               return tmpbuf;
10459             }
10460
10461           as_bad (_("@%s reloc is not supported with %d-bit output format"),
10462                   gotrel[j].str, 1 << (5 + object_64bit));
10463           return NULL;
10464         }
10465     }
10466
10467   /* Might be a symbol version string.  Don't as_bad here.  */
10468   return NULL;
10469 }
10470 #endif
10471
10472 bfd_reloc_code_real_type
10473 x86_cons (expressionS *exp, int size)
10474 {
10475   bfd_reloc_code_real_type got_reloc = NO_RELOC;
10476
10477 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)) \
10478       && !defined (LEX_AT)) \
10479     || defined (TE_PE)
10480   intel_syntax = -intel_syntax;
10481
10482   exp->X_md = 0;
10483   if (size == 4 || (object_64bit && size == 8))
10484     {
10485       /* Handle @GOTOFF and the like in an expression.  */
10486       char *save;
10487       char *gotfree_input_line;
10488       int adjust = 0;
10489
10490       save = input_line_pointer;
10491       gotfree_input_line = lex_got (&got_reloc, &adjust, NULL);
10492       if (gotfree_input_line)
10493         input_line_pointer = gotfree_input_line;
10494
10495       expression (exp);
10496
10497       if (gotfree_input_line)
10498         {
10499           /* expression () has merrily parsed up to the end of line,
10500              or a comma - in the wrong buffer.  Transfer how far
10501              input_line_pointer has moved to the right buffer.  */
10502           input_line_pointer = (save
10503                                 + (input_line_pointer - gotfree_input_line)
10504                                 + adjust);
10505           free (gotfree_input_line);
10506           if (exp->X_op == O_constant
10507               || exp->X_op == O_absent
10508               || exp->X_op == O_illegal
10509               || exp->X_op == O_register
10510               || exp->X_op == O_big)
10511             {
10512               char c = *input_line_pointer;
10513               *input_line_pointer = 0;
10514               as_bad (_("missing or invalid expression `%s'"), save);
10515               *input_line_pointer = c;
10516             }
10517           else if ((got_reloc == BFD_RELOC_386_PLT32
10518                     || got_reloc == BFD_RELOC_X86_64_PLT32)
10519                    && exp->X_op != O_symbol)
10520             {
10521               char c = *input_line_pointer;
10522               *input_line_pointer = 0;
10523               as_bad (_("invalid PLT expression `%s'"), save);
10524               *input_line_pointer = c;
10525             }
10526         }
10527     }
10528   else
10529     expression (exp);
10530
10531   intel_syntax = -intel_syntax;
10532
10533   if (intel_syntax)
10534     i386_intel_simplify (exp);
10535 #else
10536   expression (exp);
10537 #endif
10538
10539   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
10540   if (size == 4 && exp->X_op == O_constant && !object_64bit)
10541     exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
10542
10543   return got_reloc;
10544 }
10545
10546 static void
10547 signed_cons (int size)
10548 {
10549   if (object_64bit)
10550     cons_sign = 1;
10551   cons (size);
10552   cons_sign = -1;
10553 }
10554
10555 #ifdef TE_PE
10556 static void
10557 pe_directive_secrel (int dummy ATTRIBUTE_UNUSED)
10558 {
10559   expressionS exp;
10560
10561   do
10562     {
10563       expression (&exp);
10564       if (exp.X_op == O_symbol)
10565         exp.X_op = O_secrel;
10566
10567       emit_expr (&exp, 4);
10568     }
10569   while (*input_line_pointer++ == ',');
10570
10571   input_line_pointer--;
10572   demand_empty_rest_of_line ();
10573 }
10574
10575 static void
10576 pe_directive_secidx (int dummy ATTRIBUTE_UNUSED)
10577 {
10578   expressionS exp;
10579
10580   do
10581     {
10582       expression (&exp);
10583       if (exp.X_op == O_symbol)
10584         exp.X_op = O_secidx;
10585
10586       emit_expr (&exp, 2);
10587     }
10588   while (*input_line_pointer++ == ',');
10589
10590   input_line_pointer--;
10591   demand_empty_rest_of_line ();
10592 }
10593 #endif
10594
10595 /* Handle Rounding Control / SAE specifiers.  */
10596
10597 static char *
10598 RC_SAE_specifier (const char *pstr)
10599 {
10600   unsigned int j;
10601
10602   for (j = 0; j < ARRAY_SIZE (RC_NamesTable); j++)
10603     {
10604       if (!strncmp (pstr, RC_NamesTable[j].name, RC_NamesTable[j].len))
10605         {
10606           if (i.rounding.type != rc_none)
10607             {
10608               as_bad (_("duplicated `{%s}'"), RC_NamesTable[j].name);
10609               return NULL;
10610             }
10611
10612           i.rounding.type = RC_NamesTable[j].type;
10613
10614           return (char *)(pstr + RC_NamesTable[j].len);
10615         }
10616     }
10617
10618   return NULL;
10619 }
10620
10621 /* Handle Vector operations.  */
10622
10623 static char *
10624 check_VecOperations (char *op_string)
10625 {
10626   const reg_entry *mask;
10627   const char *saved;
10628   char *end_op;
10629
10630   while (*op_string)
10631     {
10632       saved = op_string;
10633       if (*op_string == '{')
10634         {
10635           op_string++;
10636
10637           /* Check broadcasts.  */
10638           if (startswith (op_string, "1to"))
10639             {
10640               unsigned int bcst_type;
10641
10642               if (i.broadcast.type)
10643                 goto duplicated_vec_op;
10644
10645               op_string += 3;
10646               if (*op_string == '8')
10647                 bcst_type = 8;
10648               else if (*op_string == '4')
10649                 bcst_type = 4;
10650               else if (*op_string == '2')
10651                 bcst_type = 2;
10652               else if (*op_string == '1'
10653                        && *(op_string+1) == '6')
10654                 {
10655                   bcst_type = 16;
10656                   op_string++;
10657                 }
10658               else if (*op_string == '3'
10659                        && *(op_string+1) == '2')
10660                 {
10661                   bcst_type = 32;
10662                   op_string++;
10663                 }
10664               else
10665                 {
10666                   as_bad (_("Unsupported broadcast: `%s'"), saved);
10667                   return NULL;
10668                 }
10669               op_string++;
10670
10671               i.broadcast.type = bcst_type;
10672               i.broadcast.operand = this_operand;
10673             }
10674           /* Check masking operation.  */
10675           else if ((mask = parse_register (op_string, &end_op)) != NULL)
10676             {
10677               if (mask == &bad_reg)
10678                 return NULL;
10679
10680               /* k0 can't be used for write mask.  */
10681               if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num)
10682                 {
10683                   as_bad (_("`%s%s' can't be used for write mask"),
10684                           register_prefix, mask->reg_name);
10685                   return NULL;
10686                 }
10687
10688               if (!i.mask.reg)
10689                 {
10690                   i.mask.reg = mask;
10691                   i.mask.operand = this_operand;
10692                 }
10693               else if (i.mask.reg->reg_num)
10694                 goto duplicated_vec_op;
10695               else
10696                 {
10697                   i.mask.reg = mask;
10698
10699                   /* Only "{z}" is allowed here.  No need to check
10700                      zeroing mask explicitly.  */
10701                   if (i.mask.operand != (unsigned int) this_operand)
10702                     {
10703                       as_bad (_("invalid write mask `%s'"), saved);
10704                       return NULL;
10705                     }
10706                 }
10707
10708               op_string = end_op;
10709             }
10710           /* Check zeroing-flag for masking operation.  */
10711           else if (*op_string == 'z')
10712             {
10713               if (!i.mask.reg)
10714                 {
10715                   i.mask.reg = reg_k0;
10716                   i.mask.zeroing = 1;
10717                   i.mask.operand = this_operand;
10718                 }
10719               else
10720                 {
10721                   if (i.mask.zeroing)
10722                     {
10723                     duplicated_vec_op:
10724                       as_bad (_("duplicated `%s'"), saved);
10725                       return NULL;
10726                     }
10727
10728                   i.mask.zeroing = 1;
10729
10730                   /* Only "{%k}" is allowed here.  No need to check mask
10731                      register explicitly.  */
10732                   if (i.mask.operand != (unsigned int) this_operand)
10733                     {
10734                       as_bad (_("invalid zeroing-masking `%s'"),
10735                               saved);
10736                       return NULL;
10737                     }
10738                 }
10739
10740               op_string++;
10741             }
10742           else if (intel_syntax
10743                    && (op_string = RC_SAE_specifier (op_string)) != NULL)
10744             i.rounding.modifier = true;
10745           else
10746             goto unknown_vec_op;
10747
10748           if (*op_string != '}')
10749             {
10750               as_bad (_("missing `}' in `%s'"), saved);
10751               return NULL;
10752             }
10753           op_string++;
10754
10755           /* Strip whitespace since the addition of pseudo prefixes
10756              changed how the scrubber treats '{'.  */
10757           if (is_space_char (*op_string))
10758             ++op_string;
10759
10760           continue;
10761         }
10762     unknown_vec_op:
10763       /* We don't know this one.  */
10764       as_bad (_("unknown vector operation: `%s'"), saved);
10765       return NULL;
10766     }
10767
10768   if (i.mask.reg && i.mask.zeroing && !i.mask.reg->reg_num)
10769     {
10770       as_bad (_("zeroing-masking only allowed with write mask"));
10771       return NULL;
10772     }
10773
10774   return op_string;
10775 }
10776
10777 static int
10778 i386_immediate (char *imm_start)
10779 {
10780   char *save_input_line_pointer;
10781   char *gotfree_input_line;
10782   segT exp_seg = 0;
10783   expressionS *exp;
10784   i386_operand_type types;
10785
10786   operand_type_set (&types, ~0);
10787
10788   if (i.imm_operands == MAX_IMMEDIATE_OPERANDS)
10789     {
10790       as_bad (_("at most %d immediate operands are allowed"),
10791               MAX_IMMEDIATE_OPERANDS);
10792       return 0;
10793     }
10794
10795   exp = &im_expressions[i.imm_operands++];
10796   i.op[this_operand].imms = exp;
10797
10798   if (is_space_char (*imm_start))
10799     ++imm_start;
10800
10801   save_input_line_pointer = input_line_pointer;
10802   input_line_pointer = imm_start;
10803
10804   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
10805   if (gotfree_input_line)
10806     input_line_pointer = gotfree_input_line;
10807
10808   exp_seg = expression (exp);
10809
10810   SKIP_WHITESPACE ();
10811   if (*input_line_pointer)
10812     as_bad (_("junk `%s' after expression"), input_line_pointer);
10813
10814   input_line_pointer = save_input_line_pointer;
10815   if (gotfree_input_line)
10816     {
10817       free (gotfree_input_line);
10818
10819       if (exp->X_op == O_constant)
10820         exp->X_op = O_illegal;
10821     }
10822
10823   if (exp_seg == reg_section)
10824     {
10825       as_bad (_("illegal immediate register operand %s"), imm_start);
10826       return 0;
10827     }
10828
10829   return i386_finalize_immediate (exp_seg, exp, types, imm_start);
10830 }
10831
10832 static int
10833 i386_finalize_immediate (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
10834                          i386_operand_type types, const char *imm_start)
10835 {
10836   if (exp->X_op == O_absent || exp->X_op == O_illegal || exp->X_op == O_big)
10837     {
10838       if (imm_start)
10839         as_bad (_("missing or invalid immediate expression `%s'"),
10840                 imm_start);
10841       return 0;
10842     }
10843   else if (exp->X_op == O_constant)
10844     {
10845       /* Size it properly later.  */
10846       i.types[this_operand].bitfield.imm64 = 1;
10847
10848       /* If not 64bit, sign/zero extend val, to account for wraparound
10849          when !BFD64.  */
10850       if (flag_code != CODE_64BIT)
10851         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
10852     }
10853 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
10854   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
10855            && exp_seg != absolute_section
10856            && exp_seg != text_section
10857            && exp_seg != data_section
10858            && exp_seg != bss_section
10859            && exp_seg != undefined_section
10860            && !bfd_is_com_section (exp_seg))
10861     {
10862       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
10863       return 0;
10864     }
10865 #endif
10866   else
10867     {
10868       /* This is an address.  The size of the address will be
10869          determined later, depending on destination register,
10870          suffix, or the default for the section.  */
10871       i.types[this_operand].bitfield.imm8 = 1;
10872       i.types[this_operand].bitfield.imm16 = 1;
10873       i.types[this_operand].bitfield.imm32 = 1;
10874       i.types[this_operand].bitfield.imm32s = 1;
10875       i.types[this_operand].bitfield.imm64 = 1;
10876       i.types[this_operand] = operand_type_and (i.types[this_operand],
10877                                                 types);
10878     }
10879
10880   return 1;
10881 }
10882
10883 static char *
10884 i386_scale (char *scale)
10885 {
10886   offsetT val;
10887   char *save = input_line_pointer;
10888
10889   input_line_pointer = scale;
10890   val = get_absolute_expression ();
10891
10892   switch (val)
10893     {
10894     case 1:
10895       i.log2_scale_factor = 0;
10896       break;
10897     case 2:
10898       i.log2_scale_factor = 1;
10899       break;
10900     case 4:
10901       i.log2_scale_factor = 2;
10902       break;
10903     case 8:
10904       i.log2_scale_factor = 3;
10905       break;
10906     default:
10907       {
10908         char sep = *input_line_pointer;
10909
10910         *input_line_pointer = '\0';
10911         as_bad (_("expecting scale factor of 1, 2, 4, or 8: got `%s'"),
10912                 scale);
10913         *input_line_pointer = sep;
10914         input_line_pointer = save;
10915         return NULL;
10916       }
10917     }
10918   if (i.log2_scale_factor != 0 && i.index_reg == 0)
10919     {
10920       as_warn (_("scale factor of %d without an index register"),
10921                1 << i.log2_scale_factor);
10922       i.log2_scale_factor = 0;
10923     }
10924   scale = input_line_pointer;
10925   input_line_pointer = save;
10926   return scale;
10927 }
10928
10929 static int
10930 i386_displacement (char *disp_start, char *disp_end)
10931 {
10932   expressionS *exp;
10933   segT exp_seg = 0;
10934   char *save_input_line_pointer;
10935   char *gotfree_input_line;
10936   int override;
10937   i386_operand_type bigdisp, types = anydisp;
10938   int ret;
10939
10940   if (i.disp_operands == MAX_MEMORY_OPERANDS)
10941     {
10942       as_bad (_("at most %d displacement operands are allowed"),
10943               MAX_MEMORY_OPERANDS);
10944       return 0;
10945     }
10946
10947   operand_type_set (&bigdisp, 0);
10948   if (i.jumpabsolute
10949       || i.types[this_operand].bitfield.baseindex
10950       || (current_templates->start->opcode_modifier.jump != JUMP
10951           && current_templates->start->opcode_modifier.jump != JUMP_DWORD))
10952     {
10953       i386_addressing_mode ();
10954       override = (i.prefix[ADDR_PREFIX] != 0);
10955       if (flag_code == CODE_64BIT)
10956         {
10957           bigdisp.bitfield.disp32 = 1;
10958           if (!override)
10959             bigdisp.bitfield.disp64 = 1;
10960         }
10961       else if ((flag_code == CODE_16BIT) ^ override)
10962           bigdisp.bitfield.disp16 = 1;
10963       else
10964           bigdisp.bitfield.disp32 = 1;
10965     }
10966   else
10967     {
10968       /* For PC-relative branches, the width of the displacement may be
10969          dependent upon data size, but is never dependent upon address size.
10970          Also make sure to not unintentionally match against a non-PC-relative
10971          branch template.  */
10972       static templates aux_templates;
10973       const insn_template *t = current_templates->start;
10974       bool has_intel64 = false;
10975
10976       aux_templates.start = t;
10977       while (++t < current_templates->end)
10978         {
10979           if (t->opcode_modifier.jump
10980               != current_templates->start->opcode_modifier.jump)
10981             break;
10982           if ((t->opcode_modifier.isa64 >= INTEL64))
10983             has_intel64 = true;
10984         }
10985       if (t < current_templates->end)
10986         {
10987           aux_templates.end = t;
10988           current_templates = &aux_templates;
10989         }
10990
10991       override = (i.prefix[DATA_PREFIX] != 0);
10992       if (flag_code == CODE_64BIT)
10993         {
10994           if ((override || i.suffix == WORD_MNEM_SUFFIX)
10995               && (!intel64 || !has_intel64))
10996             bigdisp.bitfield.disp16 = 1;
10997           else
10998             bigdisp.bitfield.disp32 = 1;
10999         }
11000       else
11001         {
11002           if (!override)
11003             override = (i.suffix == (flag_code != CODE_16BIT
11004                                      ? WORD_MNEM_SUFFIX
11005                                      : LONG_MNEM_SUFFIX));
11006           bigdisp.bitfield.disp32 = 1;
11007           if ((flag_code == CODE_16BIT) ^ override)
11008             {
11009               bigdisp.bitfield.disp32 = 0;
11010               bigdisp.bitfield.disp16 = 1;
11011             }
11012         }
11013     }
11014   i.types[this_operand] = operand_type_or (i.types[this_operand],
11015                                            bigdisp);
11016
11017   exp = &disp_expressions[i.disp_operands];
11018   i.op[this_operand].disps = exp;
11019   i.disp_operands++;
11020   save_input_line_pointer = input_line_pointer;
11021   input_line_pointer = disp_start;
11022   END_STRING_AND_SAVE (disp_end);
11023
11024 #ifndef GCC_ASM_O_HACK
11025 #define GCC_ASM_O_HACK 0
11026 #endif
11027 #if GCC_ASM_O_HACK
11028   END_STRING_AND_SAVE (disp_end + 1);
11029   if (i.types[this_operand].bitfield.baseIndex
11030       && displacement_string_end[-1] == '+')
11031     {
11032       /* This hack is to avoid a warning when using the "o"
11033          constraint within gcc asm statements.
11034          For instance:
11035
11036          #define _set_tssldt_desc(n,addr,limit,type) \
11037          __asm__ __volatile__ ( \
11038          "movw %w2,%0\n\t" \
11039          "movw %w1,2+%0\n\t" \
11040          "rorl $16,%1\n\t" \
11041          "movb %b1,4+%0\n\t" \
11042          "movb %4,5+%0\n\t" \
11043          "movb $0,6+%0\n\t" \
11044          "movb %h1,7+%0\n\t" \
11045          "rorl $16,%1" \
11046          : "=o"(*(n)) : "q" (addr), "ri"(limit), "i"(type))
11047
11048          This works great except that the output assembler ends
11049          up looking a bit weird if it turns out that there is
11050          no offset.  You end up producing code that looks like:
11051
11052          #APP
11053          movw $235,(%eax)
11054          movw %dx,2+(%eax)
11055          rorl $16,%edx
11056          movb %dl,4+(%eax)
11057          movb $137,5+(%eax)
11058          movb $0,6+(%eax)
11059          movb %dh,7+(%eax)
11060          rorl $16,%edx
11061          #NO_APP
11062
11063          So here we provide the missing zero.  */
11064
11065       *displacement_string_end = '0';
11066     }
11067 #endif
11068   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
11069   if (gotfree_input_line)
11070     input_line_pointer = gotfree_input_line;
11071
11072   exp_seg = expression (exp);
11073
11074   SKIP_WHITESPACE ();
11075   if (*input_line_pointer)
11076     as_bad (_("junk `%s' after expression"), input_line_pointer);
11077 #if GCC_ASM_O_HACK
11078   RESTORE_END_STRING (disp_end + 1);
11079 #endif
11080   input_line_pointer = save_input_line_pointer;
11081   if (gotfree_input_line)
11082     {
11083       free (gotfree_input_line);
11084
11085       if (exp->X_op == O_constant || exp->X_op == O_register)
11086         exp->X_op = O_illegal;
11087     }
11088
11089   ret = i386_finalize_displacement (exp_seg, exp, types, disp_start);
11090
11091   RESTORE_END_STRING (disp_end);
11092
11093   return ret;
11094 }
11095
11096 static int
11097 i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
11098                             i386_operand_type types, const char *disp_start)
11099 {
11100   int ret = 1;
11101
11102   /* We do this to make sure that the section symbol is in
11103      the symbol table.  We will ultimately change the relocation
11104      to be relative to the beginning of the section.  */
11105   if (i.reloc[this_operand] == BFD_RELOC_386_GOTOFF
11106       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL
11107       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
11108     {
11109       if (exp->X_op != O_symbol)
11110         goto inv_disp;
11111
11112       if (S_IS_LOCAL (exp->X_add_symbol)
11113           && S_GET_SEGMENT (exp->X_add_symbol) != undefined_section
11114           && S_GET_SEGMENT (exp->X_add_symbol) != expr_section)
11115         section_symbol (S_GET_SEGMENT (exp->X_add_symbol));
11116       exp->X_op = O_subtract;
11117       exp->X_op_symbol = GOT_symbol;
11118       if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL)
11119         i.reloc[this_operand] = BFD_RELOC_32_PCREL;
11120       else if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
11121         i.reloc[this_operand] = BFD_RELOC_64;
11122       else
11123         i.reloc[this_operand] = BFD_RELOC_32;
11124     }
11125
11126   else if (exp->X_op == O_absent
11127            || exp->X_op == O_illegal
11128            || exp->X_op == O_big)
11129     {
11130     inv_disp:
11131       as_bad (_("missing or invalid displacement expression `%s'"),
11132               disp_start);
11133       ret = 0;
11134     }
11135
11136   else if (exp->X_op == O_constant)
11137     {
11138       /* Sizing gets taken care of by optimize_disp().
11139
11140          If not 64bit, sign/zero extend val, to account for wraparound
11141          when !BFD64.  */
11142       if (flag_code != CODE_64BIT)
11143         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
11144     }
11145
11146 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
11147   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
11148            && exp_seg != absolute_section
11149            && exp_seg != text_section
11150            && exp_seg != data_section
11151            && exp_seg != bss_section
11152            && exp_seg != undefined_section
11153            && !bfd_is_com_section (exp_seg))
11154     {
11155       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
11156       ret = 0;
11157     }
11158 #endif
11159
11160   else if (current_templates->start->opcode_modifier.jump == JUMP_BYTE)
11161     i.types[this_operand].bitfield.disp8 = 1;
11162
11163   /* Check if this is a displacement only operand.  */
11164   if (!i.types[this_operand].bitfield.baseindex)
11165     i.types[this_operand] =
11166       operand_type_or (operand_type_and_not (i.types[this_operand], anydisp),
11167                        operand_type_and (i.types[this_operand], types));
11168
11169   return ret;
11170 }
11171
11172 /* Return the active addressing mode, taking address override and
11173    registers forming the address into consideration.  Update the
11174    address override prefix if necessary.  */
11175
11176 static enum flag_code
11177 i386_addressing_mode (void)
11178 {
11179   enum flag_code addr_mode;
11180
11181   if (i.prefix[ADDR_PREFIX])
11182     addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
11183   else if (flag_code == CODE_16BIT
11184            && current_templates->start->cpu_flags.bitfield.cpumpx
11185            /* Avoid replacing the "16-bit addressing not allowed" diagnostic
11186               from md_assemble() by "is not a valid base/index expression"
11187               when there is a base and/or index.  */
11188            && !i.types[this_operand].bitfield.baseindex)
11189     {
11190       /* MPX insn memory operands with neither base nor index must be forced
11191          to use 32-bit addressing in 16-bit mode.  */
11192       addr_mode = CODE_32BIT;
11193       i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
11194       ++i.prefixes;
11195       gas_assert (!i.types[this_operand].bitfield.disp16);
11196       gas_assert (!i.types[this_operand].bitfield.disp32);
11197     }
11198   else
11199     {
11200       addr_mode = flag_code;
11201
11202 #if INFER_ADDR_PREFIX
11203       if (i.mem_operands == 0)
11204         {
11205           /* Infer address prefix from the first memory operand.  */
11206           const reg_entry *addr_reg = i.base_reg;
11207
11208           if (addr_reg == NULL)
11209             addr_reg = i.index_reg;
11210
11211           if (addr_reg)
11212             {
11213               if (addr_reg->reg_type.bitfield.dword)
11214                 addr_mode = CODE_32BIT;
11215               else if (flag_code != CODE_64BIT
11216                        && addr_reg->reg_type.bitfield.word)
11217                 addr_mode = CODE_16BIT;
11218
11219               if (addr_mode != flag_code)
11220                 {
11221                   i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
11222                   i.prefixes += 1;
11223                   /* Change the size of any displacement too.  At most one
11224                      of Disp16 or Disp32 is set.
11225                      FIXME.  There doesn't seem to be any real need for
11226                      separate Disp16 and Disp32 flags.  The same goes for
11227                      Imm16 and Imm32.  Removing them would probably clean
11228                      up the code quite a lot.  */
11229                   if (flag_code != CODE_64BIT
11230                       && (i.types[this_operand].bitfield.disp16
11231                           || i.types[this_operand].bitfield.disp32))
11232                     i.types[this_operand]
11233                       = operand_type_xor (i.types[this_operand], disp16_32);
11234                 }
11235             }
11236         }
11237 #endif
11238     }
11239
11240   return addr_mode;
11241 }
11242
11243 /* Make sure the memory operand we've been dealt is valid.
11244    Return 1 on success, 0 on a failure.  */
11245
11246 static int
11247 i386_index_check (const char *operand_string)
11248 {
11249   const char *kind = "base/index";
11250   enum flag_code addr_mode = i386_addressing_mode ();
11251   const insn_template *t = current_templates->start;
11252
11253   if (t->opcode_modifier.isstring
11254       && (current_templates->end[-1].opcode_modifier.isstring
11255           || i.mem_operands))
11256     {
11257       /* Memory operands of string insns are special in that they only allow
11258          a single register (rDI, rSI, or rBX) as their memory address.  */
11259       const reg_entry *expected_reg;
11260       static const char *di_si[][2] =
11261         {
11262           { "esi", "edi" },
11263           { "si", "di" },
11264           { "rsi", "rdi" }
11265         };
11266       static const char *bx[] = { "ebx", "bx", "rbx" };
11267
11268       kind = "string address";
11269
11270       if (t->opcode_modifier.prefixok == PrefixRep)
11271         {
11272           int es_op = current_templates->end[-1].opcode_modifier.isstring
11273                       - IS_STRING_ES_OP0;
11274           int op = 0;
11275
11276           if (!current_templates->end[-1].operand_types[0].bitfield.baseindex
11277               || ((!i.mem_operands != !intel_syntax)
11278                   && current_templates->end[-1].operand_types[1]
11279                      .bitfield.baseindex))
11280             op = 1;
11281           expected_reg
11282             = (const reg_entry *) str_hash_find (reg_hash,
11283                                                  di_si[addr_mode][op == es_op]);
11284         }
11285       else
11286         expected_reg
11287           = (const reg_entry *)str_hash_find (reg_hash, bx[addr_mode]);
11288
11289       if (i.base_reg != expected_reg
11290           || i.index_reg
11291           || operand_type_check (i.types[this_operand], disp))
11292         {
11293           /* The second memory operand must have the same size as
11294              the first one.  */
11295           if (i.mem_operands
11296               && i.base_reg
11297               && !((addr_mode == CODE_64BIT
11298                     && i.base_reg->reg_type.bitfield.qword)
11299                    || (addr_mode == CODE_32BIT
11300                        ? i.base_reg->reg_type.bitfield.dword
11301                        : i.base_reg->reg_type.bitfield.word)))
11302             goto bad_address;
11303
11304           as_warn (_("`%s' is not valid here (expected `%c%s%s%c')"),
11305                    operand_string,
11306                    intel_syntax ? '[' : '(',
11307                    register_prefix,
11308                    expected_reg->reg_name,
11309                    intel_syntax ? ']' : ')');
11310           return 1;
11311         }
11312       else
11313         return 1;
11314
11315     bad_address:
11316       as_bad (_("`%s' is not a valid %s expression"),
11317               operand_string, kind);
11318       return 0;
11319     }
11320   else
11321     {
11322       if (addr_mode != CODE_16BIT)
11323         {
11324           /* 32-bit/64-bit checks.  */
11325           if (i.disp_encoding == disp_encoding_16bit)
11326             {
11327             bad_disp:
11328               as_bad (_("invalid `%s' prefix"),
11329                       addr_mode == CODE_16BIT ? "{disp32}" : "{disp16}");
11330               return 0;
11331             }
11332
11333           if ((i.base_reg
11334                && ((addr_mode == CODE_64BIT
11335                     ? !i.base_reg->reg_type.bitfield.qword
11336                     : !i.base_reg->reg_type.bitfield.dword)
11337                    || (i.index_reg && i.base_reg->reg_num == RegIP)
11338                    || i.base_reg->reg_num == RegIZ))
11339               || (i.index_reg
11340                   && !i.index_reg->reg_type.bitfield.xmmword
11341                   && !i.index_reg->reg_type.bitfield.ymmword
11342                   && !i.index_reg->reg_type.bitfield.zmmword
11343                   && ((addr_mode == CODE_64BIT
11344                        ? !i.index_reg->reg_type.bitfield.qword
11345                        : !i.index_reg->reg_type.bitfield.dword)
11346                       || !i.index_reg->reg_type.bitfield.baseindex)))
11347             goto bad_address;
11348
11349           /* bndmk, bndldx, bndstx and mandatory non-vector SIB have special restrictions. */
11350           if ((t->opcode_modifier.opcodeprefix == PREFIX_0XF3
11351                && t->opcode_modifier.opcodespace == SPACE_0F
11352                && t->base_opcode == 0x1b)
11353               || (t->opcode_modifier.opcodeprefix == PREFIX_NONE
11354                   && t->opcode_modifier.opcodespace == SPACE_0F
11355                   && (t->base_opcode & ~1) == 0x1a)
11356               || t->opcode_modifier.sib == SIBMEM)
11357             {
11358               /* They cannot use RIP-relative addressing. */
11359               if (i.base_reg && i.base_reg->reg_num == RegIP)
11360                 {
11361                   as_bad (_("`%s' cannot be used here"), operand_string);
11362                   return 0;
11363                 }
11364
11365               /* bndldx and bndstx ignore their scale factor. */
11366               if (t->opcode_modifier.opcodeprefix == PREFIX_NONE
11367                   && t->opcode_modifier.opcodespace == SPACE_0F
11368                   && (t->base_opcode & ~1) == 0x1a
11369                   && i.log2_scale_factor)
11370                 as_warn (_("register scaling is being ignored here"));
11371             }
11372         }
11373       else
11374         {
11375           /* 16-bit checks.  */
11376           if (i.disp_encoding == disp_encoding_32bit)
11377             goto bad_disp;
11378
11379           if ((i.base_reg
11380                && (!i.base_reg->reg_type.bitfield.word
11381                    || !i.base_reg->reg_type.bitfield.baseindex))
11382               || (i.index_reg
11383                   && (!i.index_reg->reg_type.bitfield.word
11384                       || !i.index_reg->reg_type.bitfield.baseindex
11385                       || !(i.base_reg
11386                            && i.base_reg->reg_num < 6
11387                            && i.index_reg->reg_num >= 6
11388                            && i.log2_scale_factor == 0))))
11389             goto bad_address;
11390         }
11391     }
11392   return 1;
11393 }
11394
11395 /* Handle vector immediates.  */
11396
11397 static int
11398 RC_SAE_immediate (const char *imm_start)
11399 {
11400   const char *pstr = imm_start;
11401
11402   if (*pstr != '{')
11403     return 0;
11404
11405   pstr = RC_SAE_specifier (pstr + 1);
11406   if (pstr == NULL)
11407     return 0;
11408
11409   if (*pstr++ != '}')
11410     {
11411       as_bad (_("Missing '}': '%s'"), imm_start);
11412       return 0;
11413     }
11414   /* RC/SAE immediate string should contain nothing more.  */;
11415   if (*pstr != 0)
11416     {
11417       as_bad (_("Junk after '}': '%s'"), imm_start);
11418       return 0;
11419     }
11420
11421   /* Internally this doesn't count as an operand.  */
11422   --i.operands;
11423
11424   return 1;
11425 }
11426
11427 /* Only string instructions can have a second memory operand, so
11428    reduce current_templates to just those if it contains any.  */
11429 static int
11430 maybe_adjust_templates (void)
11431 {
11432   const insn_template *t;
11433
11434   gas_assert (i.mem_operands == 1);
11435
11436   for (t = current_templates->start; t < current_templates->end; ++t)
11437     if (t->opcode_modifier.isstring)
11438       break;
11439
11440   if (t < current_templates->end)
11441     {
11442       static templates aux_templates;
11443       bool recheck;
11444
11445       aux_templates.start = t;
11446       for (; t < current_templates->end; ++t)
11447         if (!t->opcode_modifier.isstring)
11448           break;
11449       aux_templates.end = t;
11450
11451       /* Determine whether to re-check the first memory operand.  */
11452       recheck = (aux_templates.start != current_templates->start
11453                  || t != current_templates->end);
11454
11455       current_templates = &aux_templates;
11456
11457       if (recheck)
11458         {
11459           i.mem_operands = 0;
11460           if (i.memop1_string != NULL
11461               && i386_index_check (i.memop1_string) == 0)
11462             return 0;
11463           i.mem_operands = 1;
11464         }
11465     }
11466
11467   return 1;
11468 }
11469
11470 static INLINE bool starts_memory_operand (char c)
11471 {
11472   return ISDIGIT (c)
11473          || is_identifier_char (c)
11474          || strchr ("([\"+-!~", c);
11475 }
11476
11477 /* Parse OPERAND_STRING into the i386_insn structure I.  Returns zero
11478    on error.  */
11479
11480 static int
11481 i386_att_operand (char *operand_string)
11482 {
11483   const reg_entry *r;
11484   char *end_op;
11485   char *op_string = operand_string;
11486
11487   if (is_space_char (*op_string))
11488     ++op_string;
11489
11490   /* We check for an absolute prefix (differentiating,
11491      for example, 'jmp pc_relative_label' from 'jmp *absolute_label'.  */
11492   if (*op_string == ABSOLUTE_PREFIX)
11493     {
11494       ++op_string;
11495       if (is_space_char (*op_string))
11496         ++op_string;
11497       i.jumpabsolute = true;
11498     }
11499
11500   /* Check if operand is a register.  */
11501   if ((r = parse_register (op_string, &end_op)) != NULL)
11502     {
11503       i386_operand_type temp;
11504
11505       if (r == &bad_reg)
11506         return 0;
11507
11508       /* Check for a segment override by searching for ':' after a
11509          segment register.  */
11510       op_string = end_op;
11511       if (is_space_char (*op_string))
11512         ++op_string;
11513       if (*op_string == ':' && r->reg_type.bitfield.class == SReg)
11514         {
11515           i.seg[i.mem_operands] = r;
11516
11517           /* Skip the ':' and whitespace.  */
11518           ++op_string;
11519           if (is_space_char (*op_string))
11520             ++op_string;
11521
11522           /* Handle case of %es:*foo.  */
11523           if (!i.jumpabsolute && *op_string == ABSOLUTE_PREFIX)
11524             {
11525               ++op_string;
11526               if (is_space_char (*op_string))
11527                 ++op_string;
11528               i.jumpabsolute = true;
11529             }
11530
11531           if (!starts_memory_operand (*op_string))
11532             {
11533               as_bad (_("bad memory operand `%s'"), op_string);
11534               return 0;
11535             }
11536           goto do_memory_reference;
11537         }
11538
11539       /* Handle vector operations.  */
11540       if (*op_string == '{')
11541         {
11542           op_string = check_VecOperations (op_string);
11543           if (op_string == NULL)
11544             return 0;
11545         }
11546
11547       if (*op_string)
11548         {
11549           as_bad (_("junk `%s' after register"), op_string);
11550           return 0;
11551         }
11552       temp = r->reg_type;
11553       temp.bitfield.baseindex = 0;
11554       i.types[this_operand] = operand_type_or (i.types[this_operand],
11555                                                temp);
11556       i.types[this_operand].bitfield.unspecified = 0;
11557       i.op[this_operand].regs = r;
11558       i.reg_operands++;
11559
11560       /* A GPR may follow an RC or SAE immediate only if a (vector) register
11561          operand was also present earlier on.  */
11562       if (i.rounding.type != rc_none && temp.bitfield.class == Reg
11563           && i.reg_operands == 1)
11564         {
11565           unsigned int j;
11566
11567           for (j = 0; j < ARRAY_SIZE (RC_NamesTable); ++j)
11568             if (i.rounding.type == RC_NamesTable[j].type)
11569               break;
11570           as_bad (_("`%s': misplaced `{%s}'"),
11571                   current_templates->start->name, RC_NamesTable[j].name);
11572           return 0;
11573         }
11574     }
11575   else if (*op_string == REGISTER_PREFIX)
11576     {
11577       as_bad (_("bad register name `%s'"), op_string);
11578       return 0;
11579     }
11580   else if (*op_string == IMMEDIATE_PREFIX)
11581     {
11582       ++op_string;
11583       if (i.jumpabsolute)
11584         {
11585           as_bad (_("immediate operand illegal with absolute jump"));
11586           return 0;
11587         }
11588       if (!i386_immediate (op_string))
11589         return 0;
11590       if (i.rounding.type != rc_none)
11591         {
11592           as_bad (_("`%s': RC/SAE operand must follow immediate operands"),
11593                   current_templates->start->name);
11594           return 0;
11595         }
11596     }
11597   else if (RC_SAE_immediate (operand_string))
11598     {
11599       /* If it is a RC or SAE immediate, do the necessary placement check:
11600          Only another immediate or a GPR may precede it.  */
11601       if (i.mem_operands || i.reg_operands + i.imm_operands > 1
11602           || (i.reg_operands == 1
11603               && i.op[0].regs->reg_type.bitfield.class != Reg))
11604         {
11605           as_bad (_("`%s': misplaced `%s'"),
11606                   current_templates->start->name, operand_string);
11607           return 0;
11608         }
11609     }
11610   else if (starts_memory_operand (*op_string))
11611     {
11612       /* This is a memory reference of some sort.  */
11613       char *base_string;
11614
11615       /* Start and end of displacement string expression (if found).  */
11616       char *displacement_string_start;
11617       char *displacement_string_end;
11618
11619     do_memory_reference:
11620       if (i.mem_operands == 1 && !maybe_adjust_templates ())
11621         return 0;
11622       if ((i.mem_operands == 1
11623            && !current_templates->start->opcode_modifier.isstring)
11624           || i.mem_operands == 2)
11625         {
11626           as_bad (_("too many memory references for `%s'"),
11627                   current_templates->start->name);
11628           return 0;
11629         }
11630
11631       /* Check for base index form.  We detect the base index form by
11632          looking for an ')' at the end of the operand, searching
11633          for the '(' matching it, and finding a REGISTER_PREFIX or ','
11634          after the '('.  */
11635       base_string = op_string + strlen (op_string);
11636
11637       /* Handle vector operations.  */
11638       --base_string;
11639       if (is_space_char (*base_string))
11640         --base_string;
11641
11642       if (*base_string == '}')
11643         {
11644           char *vop_start = NULL;
11645
11646           while (base_string-- > op_string)
11647             {
11648               if (*base_string == '"')
11649                 break;
11650               if (*base_string != '{')
11651                 continue;
11652
11653               vop_start = base_string;
11654
11655               --base_string;
11656               if (is_space_char (*base_string))
11657                 --base_string;
11658
11659               if (*base_string != '}')
11660                 break;
11661
11662               vop_start = NULL;
11663             }
11664
11665           if (!vop_start)
11666             {
11667               as_bad (_("unbalanced figure braces"));
11668               return 0;
11669             }
11670
11671           if (check_VecOperations (vop_start) == NULL)
11672             return 0;
11673         }
11674
11675       /* If we only have a displacement, set-up for it to be parsed later.  */
11676       displacement_string_start = op_string;
11677       displacement_string_end = base_string + 1;
11678
11679       if (*base_string == ')')
11680         {
11681           char *temp_string;
11682           unsigned int parens_not_balanced = 0;
11683           bool in_quotes = false;
11684
11685           /* We've already checked that the number of left & right ()'s are
11686              equal, and that there's a matching set of double quotes.  */
11687           end_op = base_string;
11688           for (temp_string = op_string; temp_string < end_op; temp_string++)
11689             {
11690               if (*temp_string == '\\' && temp_string[1] == '"')
11691                 ++temp_string;
11692               else if (*temp_string == '"')
11693                 in_quotes = !in_quotes;
11694               else if (!in_quotes)
11695                 {
11696                   if (*temp_string == '(' && !parens_not_balanced++)
11697                     base_string = temp_string;
11698                   if (*temp_string == ')')
11699                     --parens_not_balanced;
11700                 }
11701             }
11702
11703           temp_string = base_string;
11704
11705           /* Skip past '(' and whitespace.  */
11706           gas_assert (*base_string == '(');
11707           ++base_string;
11708           if (is_space_char (*base_string))
11709             ++base_string;
11710
11711           if (*base_string == ','
11712               || ((i.base_reg = parse_register (base_string, &end_op))
11713                   != NULL))
11714             {
11715               displacement_string_end = temp_string;
11716
11717               i.types[this_operand].bitfield.baseindex = 1;
11718
11719               if (i.base_reg)
11720                 {
11721                   if (i.base_reg == &bad_reg)
11722                     return 0;
11723                   base_string = end_op;
11724                   if (is_space_char (*base_string))
11725                     ++base_string;
11726                 }
11727
11728               /* There may be an index reg or scale factor here.  */
11729               if (*base_string == ',')
11730                 {
11731                   ++base_string;
11732                   if (is_space_char (*base_string))
11733                     ++base_string;
11734
11735                   if ((i.index_reg = parse_register (base_string, &end_op))
11736                       != NULL)
11737                     {
11738                       if (i.index_reg == &bad_reg)
11739                         return 0;
11740                       base_string = end_op;
11741                       if (is_space_char (*base_string))
11742                         ++base_string;
11743                       if (*base_string == ',')
11744                         {
11745                           ++base_string;
11746                           if (is_space_char (*base_string))
11747                             ++base_string;
11748                         }
11749                       else if (*base_string != ')')
11750                         {
11751                           as_bad (_("expecting `,' or `)' "
11752                                     "after index register in `%s'"),
11753                                   operand_string);
11754                           return 0;
11755                         }
11756                     }
11757                   else if (*base_string == REGISTER_PREFIX)
11758                     {
11759                       end_op = strchr (base_string, ',');
11760                       if (end_op)
11761                         *end_op = '\0';
11762                       as_bad (_("bad register name `%s'"), base_string);
11763                       return 0;
11764                     }
11765
11766                   /* Check for scale factor.  */
11767                   if (*base_string != ')')
11768                     {
11769                       char *end_scale = i386_scale (base_string);
11770
11771                       if (!end_scale)
11772                         return 0;
11773
11774                       base_string = end_scale;
11775                       if (is_space_char (*base_string))
11776                         ++base_string;
11777                       if (*base_string != ')')
11778                         {
11779                           as_bad (_("expecting `)' "
11780                                     "after scale factor in `%s'"),
11781                                   operand_string);
11782                           return 0;
11783                         }
11784                     }
11785                   else if (!i.index_reg)
11786                     {
11787                       as_bad (_("expecting index register or scale factor "
11788                                 "after `,'; got '%c'"),
11789                               *base_string);
11790                       return 0;
11791                     }
11792                 }
11793               else if (*base_string != ')')
11794                 {
11795                   as_bad (_("expecting `,' or `)' "
11796                             "after base register in `%s'"),
11797                           operand_string);
11798                   return 0;
11799                 }
11800             }
11801           else if (*base_string == REGISTER_PREFIX)
11802             {
11803               end_op = strchr (base_string, ',');
11804               if (end_op)
11805                 *end_op = '\0';
11806               as_bad (_("bad register name `%s'"), base_string);
11807               return 0;
11808             }
11809         }
11810
11811       /* If there's an expression beginning the operand, parse it,
11812          assuming displacement_string_start and
11813          displacement_string_end are meaningful.  */
11814       if (displacement_string_start != displacement_string_end)
11815         {
11816           if (!i386_displacement (displacement_string_start,
11817                                   displacement_string_end))
11818             return 0;
11819         }
11820
11821       /* Special case for (%dx) while doing input/output op.  */
11822       if (i.base_reg
11823           && i.base_reg->reg_type.bitfield.instance == RegD
11824           && i.base_reg->reg_type.bitfield.word
11825           && i.index_reg == 0
11826           && i.log2_scale_factor == 0
11827           && i.seg[i.mem_operands] == 0
11828           && !operand_type_check (i.types[this_operand], disp))
11829         {
11830           i.types[this_operand] = i.base_reg->reg_type;
11831           i.input_output_operand = true;
11832           return 1;
11833         }
11834
11835       if (i386_index_check (operand_string) == 0)
11836         return 0;
11837       i.flags[this_operand] |= Operand_Mem;
11838       if (i.mem_operands == 0)
11839         i.memop1_string = xstrdup (operand_string);
11840       i.mem_operands++;
11841     }
11842   else
11843     {
11844       /* It's not a memory operand; argh!  */
11845       as_bad (_("invalid char %s beginning operand %d `%s'"),
11846               output_invalid (*op_string),
11847               this_operand + 1,
11848               op_string);
11849       return 0;
11850     }
11851   return 1;                     /* Normal return.  */
11852 }
11853 \f
11854 /* Calculate the maximum variable size (i.e., excluding fr_fix)
11855    that an rs_machine_dependent frag may reach.  */
11856
11857 unsigned int
11858 i386_frag_max_var (fragS *frag)
11859 {
11860   /* The only relaxable frags are for jumps.
11861      Unconditional jumps can grow by 4 bytes and others by 5 bytes.  */
11862   gas_assert (frag->fr_type == rs_machine_dependent);
11863   return TYPE_FROM_RELAX_STATE (frag->fr_subtype) == UNCOND_JUMP ? 4 : 5;
11864 }
11865
11866 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11867 static int
11868 elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var)
11869 {
11870   /* STT_GNU_IFUNC symbol must go through PLT.  */
11871   if ((symbol_get_bfdsym (fr_symbol)->flags
11872        & BSF_GNU_INDIRECT_FUNCTION) != 0)
11873     return 0;
11874
11875   if (!S_IS_EXTERNAL (fr_symbol))
11876     /* Symbol may be weak or local.  */
11877     return !S_IS_WEAK (fr_symbol);
11878
11879   /* Global symbols with non-default visibility can't be preempted. */
11880   if (ELF_ST_VISIBILITY (S_GET_OTHER (fr_symbol)) != STV_DEFAULT)
11881     return 1;
11882
11883   if (fr_var != NO_RELOC)
11884     switch ((enum bfd_reloc_code_real) fr_var)
11885       {
11886       case BFD_RELOC_386_PLT32:
11887       case BFD_RELOC_X86_64_PLT32:
11888         /* Symbol with PLT relocation may be preempted. */
11889         return 0;
11890       default:
11891         abort ();
11892       }
11893
11894   /* Global symbols with default visibility in a shared library may be
11895      preempted by another definition.  */
11896   return !shared;
11897 }
11898 #endif
11899
11900 /* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
11901    Note also work for Skylake and Cascadelake.
11902 ---------------------------------------------------------------------
11903 |   JCC   | ADD/SUB/CMP | INC/DEC | TEST/AND |
11904 | ------  | ----------- | ------- | -------- |
11905 |   Jo    |      N      |    N    |     Y    |
11906 |   Jno   |      N      |    N    |     Y    |
11907 |  Jc/Jb  |      Y      |    N    |     Y    |
11908 | Jae/Jnb |      Y      |    N    |     Y    |
11909 |  Je/Jz  |      Y      |    Y    |     Y    |
11910 | Jne/Jnz |      Y      |    Y    |     Y    |
11911 | Jna/Jbe |      Y      |    N    |     Y    |
11912 | Ja/Jnbe |      Y      |    N    |     Y    |
11913 |   Js    |      N      |    N    |     Y    |
11914 |   Jns   |      N      |    N    |     Y    |
11915 |  Jp/Jpe |      N      |    N    |     Y    |
11916 | Jnp/Jpo |      N      |    N    |     Y    |
11917 | Jl/Jnge |      Y      |    Y    |     Y    |
11918 | Jge/Jnl |      Y      |    Y    |     Y    |
11919 | Jle/Jng |      Y      |    Y    |     Y    |
11920 | Jg/Jnle |      Y      |    Y    |     Y    |
11921 ---------------------------------------------------------------------  */
11922 static int
11923 i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
11924 {
11925   if (mf_cmp == mf_cmp_alu_cmp)
11926     return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
11927             || mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
11928   if (mf_cmp == mf_cmp_incdec)
11929     return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
11930             || mf_jcc == mf_jcc_jle);
11931   if (mf_cmp == mf_cmp_test_and)
11932     return 1;
11933   return 0;
11934 }
11935
11936 /* Return the next non-empty frag.  */
11937
11938 static fragS *
11939 i386_next_non_empty_frag (fragS *fragP)
11940 {
11941   /* There may be a frag with a ".fill 0" when there is no room in
11942      the current frag for frag_grow in output_insn.  */
11943   for (fragP = fragP->fr_next;
11944        (fragP != NULL
11945         && fragP->fr_type == rs_fill
11946         && fragP->fr_fix == 0);
11947        fragP = fragP->fr_next)
11948     ;
11949   return fragP;
11950 }
11951
11952 /* Return the next jcc frag after BRANCH_PADDING.  */
11953
11954 static fragS *
11955 i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
11956 {
11957   fragS *branch_fragP;
11958   if (!pad_fragP)
11959     return NULL;
11960
11961   if (pad_fragP->fr_type == rs_machine_dependent
11962       && (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
11963           == BRANCH_PADDING))
11964     {
11965       branch_fragP = i386_next_non_empty_frag (pad_fragP);
11966       if (branch_fragP->fr_type != rs_machine_dependent)
11967         return NULL;
11968       if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
11969           && i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
11970                                    pad_fragP->tc_frag_data.mf_type))
11971         return branch_fragP;
11972     }
11973
11974   return NULL;
11975 }
11976
11977 /* Classify BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags.  */
11978
11979 static void
11980 i386_classify_machine_dependent_frag (fragS *fragP)
11981 {
11982   fragS *cmp_fragP;
11983   fragS *pad_fragP;
11984   fragS *branch_fragP;
11985   fragS *next_fragP;
11986   unsigned int max_prefix_length;
11987
11988   if (fragP->tc_frag_data.classified)
11989     return;
11990
11991   /* First scan for BRANCH_PADDING and FUSED_JCC_PADDING.  Convert
11992      FUSED_JCC_PADDING and merge BRANCH_PADDING.  */
11993   for (next_fragP = fragP;
11994        next_fragP != NULL;
11995        next_fragP = next_fragP->fr_next)
11996     {
11997       next_fragP->tc_frag_data.classified = 1;
11998       if (next_fragP->fr_type == rs_machine_dependent)
11999         switch (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype))
12000           {
12001           case BRANCH_PADDING:
12002             /* The BRANCH_PADDING frag must be followed by a branch
12003                frag.  */
12004             branch_fragP = i386_next_non_empty_frag (next_fragP);
12005             next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
12006             break;
12007           case FUSED_JCC_PADDING:
12008             /* Check if this is a fused jcc:
12009                FUSED_JCC_PADDING
12010                CMP like instruction
12011                BRANCH_PADDING
12012                COND_JUMP
12013                */
12014             cmp_fragP = i386_next_non_empty_frag (next_fragP);
12015             pad_fragP = i386_next_non_empty_frag (cmp_fragP);
12016             branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
12017             if (branch_fragP)
12018               {
12019                 /* The BRANCH_PADDING frag is merged with the
12020                    FUSED_JCC_PADDING frag.  */
12021                 next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
12022                 /* CMP like instruction size.  */
12023                 next_fragP->tc_frag_data.cmp_size = cmp_fragP->fr_fix;
12024                 frag_wane (pad_fragP);
12025                 /* Skip to branch_fragP.  */
12026                 next_fragP = branch_fragP;
12027               }
12028             else if (next_fragP->tc_frag_data.max_prefix_length)
12029               {
12030                 /* Turn FUSED_JCC_PADDING into BRANCH_PREFIX if it isn't
12031                    a fused jcc.  */
12032                 next_fragP->fr_subtype
12033                   = ENCODE_RELAX_STATE (BRANCH_PREFIX, 0);
12034                 next_fragP->tc_frag_data.max_bytes
12035                   = next_fragP->tc_frag_data.max_prefix_length;
12036                 /* This will be updated in the BRANCH_PREFIX scan.  */
12037                 next_fragP->tc_frag_data.max_prefix_length = 0;
12038               }
12039             else
12040               frag_wane (next_fragP);
12041             break;
12042           }
12043     }
12044
12045   /* Stop if there is no BRANCH_PREFIX.  */
12046   if (!align_branch_prefix_size)
12047     return;
12048
12049   /* Scan for BRANCH_PREFIX.  */
12050   for (; fragP != NULL; fragP = fragP->fr_next)
12051     {
12052       if (fragP->fr_type != rs_machine_dependent
12053           || (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
12054               != BRANCH_PREFIX))
12055         continue;
12056
12057       /* Count all BRANCH_PREFIX frags before BRANCH_PADDING and
12058          COND_JUMP_PREFIX.  */
12059       max_prefix_length = 0;
12060       for (next_fragP = fragP;
12061            next_fragP != NULL;
12062            next_fragP = next_fragP->fr_next)
12063         {
12064           if (next_fragP->fr_type == rs_fill)
12065             /* Skip rs_fill frags.  */
12066             continue;
12067           else if (next_fragP->fr_type != rs_machine_dependent)
12068             /* Stop for all other frags.  */
12069             break;
12070
12071           /* rs_machine_dependent frags.  */
12072           if (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12073               == BRANCH_PREFIX)
12074             {
12075               /* Count BRANCH_PREFIX frags.  */
12076               if (max_prefix_length >= MAX_FUSED_JCC_PADDING_SIZE)
12077                 {
12078                   max_prefix_length = MAX_FUSED_JCC_PADDING_SIZE;
12079                   frag_wane (next_fragP);
12080                 }
12081               else
12082                 max_prefix_length
12083                   += next_fragP->tc_frag_data.max_bytes;
12084             }
12085           else if ((TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12086                     == BRANCH_PADDING)
12087                    || (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12088                        == FUSED_JCC_PADDING))
12089             {
12090               /* Stop at BRANCH_PADDING and FUSED_JCC_PADDING.  */
12091               fragP->tc_frag_data.u.padding_fragP = next_fragP;
12092               break;
12093             }
12094           else
12095             /* Stop for other rs_machine_dependent frags.  */
12096             break;
12097         }
12098
12099       fragP->tc_frag_data.max_prefix_length = max_prefix_length;
12100
12101       /* Skip to the next frag.  */
12102       fragP = next_fragP;
12103     }
12104 }
12105
12106 /* Compute padding size for
12107
12108         FUSED_JCC_PADDING
12109         CMP like instruction
12110         BRANCH_PADDING
12111         COND_JUMP/UNCOND_JUMP
12112
12113    or
12114
12115         BRANCH_PADDING
12116         COND_JUMP/UNCOND_JUMP
12117  */
12118
12119 static int
12120 i386_branch_padding_size (fragS *fragP, offsetT address)
12121 {
12122   unsigned int offset, size, padding_size;
12123   fragS *branch_fragP = fragP->tc_frag_data.u.branch_fragP;
12124
12125   /* The start address of the BRANCH_PADDING or FUSED_JCC_PADDING frag.  */
12126   if (!address)
12127     address = fragP->fr_address;
12128   address += fragP->fr_fix;
12129
12130   /* CMP like instrunction size.  */
12131   size = fragP->tc_frag_data.cmp_size;
12132
12133   /* The base size of the branch frag.  */
12134   size += branch_fragP->fr_fix;
12135
12136   /* Add opcode and displacement bytes for the rs_machine_dependent
12137      branch frag.  */
12138   if (branch_fragP->fr_type == rs_machine_dependent)
12139     size += md_relax_table[branch_fragP->fr_subtype].rlx_length;
12140
12141   /* Check if branch is within boundary and doesn't end at the last
12142      byte.  */
12143   offset = address & ((1U << align_branch_power) - 1);
12144   if ((offset + size) >= (1U << align_branch_power))
12145     /* Padding needed to avoid crossing boundary.  */
12146     padding_size = (1U << align_branch_power) - offset;
12147   else
12148     /* No padding needed.  */
12149     padding_size = 0;
12150
12151   /* The return value may be saved in tc_frag_data.length which is
12152      unsigned byte.  */
12153   if (!fits_in_unsigned_byte (padding_size))
12154     abort ();
12155
12156   return padding_size;
12157 }
12158
12159 /* i386_generic_table_relax_frag()
12160
12161    Handle BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags to
12162    grow/shrink padding to align branch frags.  Hand others to
12163    relax_frag().  */
12164
12165 long
12166 i386_generic_table_relax_frag (segT segment, fragS *fragP, long stretch)
12167 {
12168   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12169       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
12170     {
12171       long padding_size = i386_branch_padding_size (fragP, 0);
12172       long grow = padding_size - fragP->tc_frag_data.length;
12173
12174       /* When the BRANCH_PREFIX frag is used, the computed address
12175          must match the actual address and there should be no padding.  */
12176       if (fragP->tc_frag_data.padding_address
12177           && (fragP->tc_frag_data.padding_address != fragP->fr_address
12178               || padding_size))
12179         abort ();
12180
12181       /* Update the padding size.  */
12182       if (grow)
12183         fragP->tc_frag_data.length = padding_size;
12184
12185       return grow;
12186     }
12187   else if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12188     {
12189       fragS *padding_fragP, *next_fragP;
12190       long padding_size, left_size, last_size;
12191
12192       padding_fragP = fragP->tc_frag_data.u.padding_fragP;
12193       if (!padding_fragP)
12194         /* Use the padding set by the leading BRANCH_PREFIX frag.  */
12195         return (fragP->tc_frag_data.length
12196                 - fragP->tc_frag_data.last_length);
12197
12198       /* Compute the relative address of the padding frag in the very
12199         first time where the BRANCH_PREFIX frag sizes are zero.  */
12200       if (!fragP->tc_frag_data.padding_address)
12201         fragP->tc_frag_data.padding_address
12202           = padding_fragP->fr_address - (fragP->fr_address - stretch);
12203
12204       /* First update the last length from the previous interation.  */
12205       left_size = fragP->tc_frag_data.prefix_length;
12206       for (next_fragP = fragP;
12207            next_fragP != padding_fragP;
12208            next_fragP = next_fragP->fr_next)
12209         if (next_fragP->fr_type == rs_machine_dependent
12210             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12211                 == BRANCH_PREFIX))
12212           {
12213             if (left_size)
12214               {
12215                 int max = next_fragP->tc_frag_data.max_bytes;
12216                 if (max)
12217                   {
12218                     int size;
12219                     if (max > left_size)
12220                       size = left_size;
12221                     else
12222                       size = max;
12223                     left_size -= size;
12224                     next_fragP->tc_frag_data.last_length = size;
12225                   }
12226               }
12227             else
12228               next_fragP->tc_frag_data.last_length = 0;
12229           }
12230
12231       /* Check the padding size for the padding frag.  */
12232       padding_size = i386_branch_padding_size
12233         (padding_fragP, (fragP->fr_address
12234                          + fragP->tc_frag_data.padding_address));
12235
12236       last_size = fragP->tc_frag_data.prefix_length;
12237       /* Check if there is change from the last interation.  */
12238       if (padding_size == last_size)
12239         {
12240           /* Update the expected address of the padding frag.  */
12241           padding_fragP->tc_frag_data.padding_address
12242             = (fragP->fr_address + padding_size
12243                + fragP->tc_frag_data.padding_address);
12244           return 0;
12245         }
12246
12247       if (padding_size > fragP->tc_frag_data.max_prefix_length)
12248         {
12249           /* No padding if there is no sufficient room.  Clear the
12250              expected address of the padding frag.  */
12251           padding_fragP->tc_frag_data.padding_address = 0;
12252           padding_size = 0;
12253         }
12254       else
12255         /* Store the expected address of the padding frag.  */
12256         padding_fragP->tc_frag_data.padding_address
12257           = (fragP->fr_address + padding_size
12258              + fragP->tc_frag_data.padding_address);
12259
12260       fragP->tc_frag_data.prefix_length = padding_size;
12261
12262       /* Update the length for the current interation.  */
12263       left_size = padding_size;
12264       for (next_fragP = fragP;
12265            next_fragP != padding_fragP;
12266            next_fragP = next_fragP->fr_next)
12267         if (next_fragP->fr_type == rs_machine_dependent
12268             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12269                 == BRANCH_PREFIX))
12270           {
12271             if (left_size)
12272               {
12273                 int max = next_fragP->tc_frag_data.max_bytes;
12274                 if (max)
12275                   {
12276                     int size;
12277                     if (max > left_size)
12278                       size = left_size;
12279                     else
12280                       size = max;
12281                     left_size -= size;
12282                     next_fragP->tc_frag_data.length = size;
12283                   }
12284               }
12285             else
12286               next_fragP->tc_frag_data.length = 0;
12287           }
12288
12289       return (fragP->tc_frag_data.length
12290               - fragP->tc_frag_data.last_length);
12291     }
12292   return relax_frag (segment, fragP, stretch);
12293 }
12294
12295 /* md_estimate_size_before_relax()
12296
12297    Called just before relax() for rs_machine_dependent frags.  The x86
12298    assembler uses these frags to handle variable size jump
12299    instructions.
12300
12301    Any symbol that is now undefined will not become defined.
12302    Return the correct fr_subtype in the frag.
12303    Return the initial "guess for variable size of frag" to caller.
12304    The guess is actually the growth beyond the fixed part.  Whatever
12305    we do to grow the fixed or variable part contributes to our
12306    returned value.  */
12307
12308 int
12309 md_estimate_size_before_relax (fragS *fragP, segT segment)
12310 {
12311   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12312       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX
12313       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
12314     {
12315       i386_classify_machine_dependent_frag (fragP);
12316       return fragP->tc_frag_data.length;
12317     }
12318
12319   /* We've already got fragP->fr_subtype right;  all we have to do is
12320      check for un-relaxable symbols.  On an ELF system, we can't relax
12321      an externally visible symbol, because it may be overridden by a
12322      shared library.  */
12323   if (S_GET_SEGMENT (fragP->fr_symbol) != segment
12324 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12325       || (IS_ELF
12326           && !elf_symbol_resolved_in_segment_p (fragP->fr_symbol,
12327                                                 fragP->fr_var))
12328 #endif
12329 #if defined (OBJ_COFF) && defined (TE_PE)
12330       || (OUTPUT_FLAVOR == bfd_target_coff_flavour
12331           && S_IS_WEAK (fragP->fr_symbol))
12332 #endif
12333       )
12334     {
12335       /* Symbol is undefined in this segment, or we need to keep a
12336          reloc so that weak symbols can be overridden.  */
12337       int size = (fragP->fr_subtype & CODE16) ? 2 : 4;
12338       enum bfd_reloc_code_real reloc_type;
12339       unsigned char *opcode;
12340       int old_fr_fix;
12341       fixS *fixP = NULL;
12342
12343       if (fragP->fr_var != NO_RELOC)
12344         reloc_type = (enum bfd_reloc_code_real) fragP->fr_var;
12345       else if (size == 2)
12346         reloc_type = BFD_RELOC_16_PCREL;
12347 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12348       else if (fragP->tc_frag_data.code64 && fragP->fr_offset == 0
12349                && need_plt32_p (fragP->fr_symbol))
12350         reloc_type = BFD_RELOC_X86_64_PLT32;
12351 #endif
12352       else
12353         reloc_type = BFD_RELOC_32_PCREL;
12354
12355       old_fr_fix = fragP->fr_fix;
12356       opcode = (unsigned char *) fragP->fr_opcode;
12357
12358       switch (TYPE_FROM_RELAX_STATE (fragP->fr_subtype))
12359         {
12360         case UNCOND_JUMP:
12361           /* Make jmp (0xeb) a (d)word displacement jump.  */
12362           opcode[0] = 0xe9;
12363           fragP->fr_fix += size;
12364           fixP = fix_new (fragP, old_fr_fix, size,
12365                           fragP->fr_symbol,
12366                           fragP->fr_offset, 1,
12367                           reloc_type);
12368           break;
12369
12370         case COND_JUMP86:
12371           if (size == 2
12372               && (!no_cond_jump_promotion || fragP->fr_var != NO_RELOC))
12373             {
12374               /* Negate the condition, and branch past an
12375                  unconditional jump.  */
12376               opcode[0] ^= 1;
12377               opcode[1] = 3;
12378               /* Insert an unconditional jump.  */
12379               opcode[2] = 0xe9;
12380               /* We added two extra opcode bytes, and have a two byte
12381                  offset.  */
12382               fragP->fr_fix += 2 + 2;
12383               fix_new (fragP, old_fr_fix + 2, 2,
12384                        fragP->fr_symbol,
12385                        fragP->fr_offset, 1,
12386                        reloc_type);
12387               break;
12388             }
12389           /* Fall through.  */
12390
12391         case COND_JUMP:
12392           if (no_cond_jump_promotion && fragP->fr_var == NO_RELOC)
12393             {
12394               fragP->fr_fix += 1;
12395               fixP = fix_new (fragP, old_fr_fix, 1,
12396                               fragP->fr_symbol,
12397                               fragP->fr_offset, 1,
12398                               BFD_RELOC_8_PCREL);
12399               fixP->fx_signed = 1;
12400               break;
12401             }
12402
12403           /* This changes the byte-displacement jump 0x7N
12404              to the (d)word-displacement jump 0x0f,0x8N.  */
12405           opcode[1] = opcode[0] + 0x10;
12406           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12407           /* We've added an opcode byte.  */
12408           fragP->fr_fix += 1 + size;
12409           fixP = fix_new (fragP, old_fr_fix + 1, size,
12410                           fragP->fr_symbol,
12411                           fragP->fr_offset, 1,
12412                           reloc_type);
12413           break;
12414
12415         default:
12416           BAD_CASE (fragP->fr_subtype);
12417           break;
12418         }
12419
12420       /* All jumps handled here are signed, but don't unconditionally use a
12421          signed limit check for 32 and 16 bit jumps as we want to allow wrap
12422          around at 4G (outside of 64-bit mode) and 64k.  */
12423       if (size == 4 && flag_code == CODE_64BIT)
12424         fixP->fx_signed = 1;
12425
12426       frag_wane (fragP);
12427       return fragP->fr_fix - old_fr_fix;
12428     }
12429
12430   /* Guess size depending on current relax state.  Initially the relax
12431      state will correspond to a short jump and we return 1, because
12432      the variable part of the frag (the branch offset) is one byte
12433      long.  However, we can relax a section more than once and in that
12434      case we must either set fr_subtype back to the unrelaxed state,
12435      or return the value for the appropriate branch.  */
12436   return md_relax_table[fragP->fr_subtype].rlx_length;
12437 }
12438
12439 /* Called after relax() is finished.
12440
12441    In:  Address of frag.
12442         fr_type == rs_machine_dependent.
12443         fr_subtype is what the address relaxed to.
12444
12445    Out: Any fixSs and constants are set up.
12446         Caller will turn frag into a ".space 0".  */
12447
12448 void
12449 md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED, segT sec ATTRIBUTE_UNUSED,
12450                  fragS *fragP)
12451 {
12452   unsigned char *opcode;
12453   unsigned char *where_to_put_displacement = NULL;
12454   offsetT target_address;
12455   offsetT opcode_address;
12456   unsigned int extension = 0;
12457   offsetT displacement_from_opcode_start;
12458
12459   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12460       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING
12461       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12462     {
12463       /* Generate nop padding.  */
12464       unsigned int size = fragP->tc_frag_data.length;
12465       if (size)
12466         {
12467           if (size > fragP->tc_frag_data.max_bytes)
12468             abort ();
12469
12470           if (flag_debug)
12471             {
12472               const char *msg;
12473               const char *branch = "branch";
12474               const char *prefix = "";
12475               fragS *padding_fragP;
12476               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
12477                   == BRANCH_PREFIX)
12478                 {
12479                   padding_fragP = fragP->tc_frag_data.u.padding_fragP;
12480                   switch (fragP->tc_frag_data.default_prefix)
12481                     {
12482                     default:
12483                       abort ();
12484                       break;
12485                     case CS_PREFIX_OPCODE:
12486                       prefix = " cs";
12487                       break;
12488                     case DS_PREFIX_OPCODE:
12489                       prefix = " ds";
12490                       break;
12491                     case ES_PREFIX_OPCODE:
12492                       prefix = " es";
12493                       break;
12494                     case FS_PREFIX_OPCODE:
12495                       prefix = " fs";
12496                       break;
12497                     case GS_PREFIX_OPCODE:
12498                       prefix = " gs";
12499                       break;
12500                     case SS_PREFIX_OPCODE:
12501                       prefix = " ss";
12502                       break;
12503                     }
12504                   if (padding_fragP)
12505                     msg = _("%s:%u: add %d%s at 0x%llx to align "
12506                             "%s within %d-byte boundary\n");
12507                   else
12508                     msg = _("%s:%u: add additional %d%s at 0x%llx to "
12509                             "align %s within %d-byte boundary\n");
12510                 }
12511               else
12512                 {
12513                   padding_fragP = fragP;
12514                   msg = _("%s:%u: add %d%s-byte nop at 0x%llx to align "
12515                           "%s within %d-byte boundary\n");
12516                 }
12517
12518               if (padding_fragP)
12519                 switch (padding_fragP->tc_frag_data.branch_type)
12520                   {
12521                   case align_branch_jcc:
12522                     branch = "jcc";
12523                     break;
12524                   case align_branch_fused:
12525                     branch = "fused jcc";
12526                     break;
12527                   case align_branch_jmp:
12528                     branch = "jmp";
12529                     break;
12530                   case align_branch_call:
12531                     branch = "call";
12532                     break;
12533                   case align_branch_indirect:
12534                     branch = "indiret branch";
12535                     break;
12536                   case align_branch_ret:
12537                     branch = "ret";
12538                     break;
12539                   default:
12540                     break;
12541                   }
12542
12543               fprintf (stdout, msg,
12544                        fragP->fr_file, fragP->fr_line, size, prefix,
12545                        (long long) fragP->fr_address, branch,
12546                        1 << align_branch_power);
12547             }
12548           if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12549             memset (fragP->fr_opcode,
12550                     fragP->tc_frag_data.default_prefix, size);
12551           else
12552             i386_generate_nops (fragP, (char *) fragP->fr_opcode,
12553                                 size, 0);
12554           fragP->fr_fix += size;
12555         }
12556       return;
12557     }
12558
12559   opcode = (unsigned char *) fragP->fr_opcode;
12560
12561   /* Address we want to reach in file space.  */
12562   target_address = S_GET_VALUE (fragP->fr_symbol) + fragP->fr_offset;
12563
12564   /* Address opcode resides at in file space.  */
12565   opcode_address = fragP->fr_address + fragP->fr_fix;
12566
12567   /* Displacement from opcode start to fill into instruction.  */
12568   displacement_from_opcode_start = target_address - opcode_address;
12569
12570   if ((fragP->fr_subtype & BIG) == 0)
12571     {
12572       /* Don't have to change opcode.  */
12573       extension = 1;            /* 1 opcode + 1 displacement  */
12574       where_to_put_displacement = &opcode[1];
12575     }
12576   else
12577     {
12578       if (no_cond_jump_promotion
12579           && TYPE_FROM_RELAX_STATE (fragP->fr_subtype) != UNCOND_JUMP)
12580         as_warn_where (fragP->fr_file, fragP->fr_line,
12581                        _("long jump required"));
12582
12583       switch (fragP->fr_subtype)
12584         {
12585         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
12586           extension = 4;                /* 1 opcode + 4 displacement  */
12587           opcode[0] = 0xe9;
12588           where_to_put_displacement = &opcode[1];
12589           break;
12590
12591         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
12592           extension = 2;                /* 1 opcode + 2 displacement  */
12593           opcode[0] = 0xe9;
12594           where_to_put_displacement = &opcode[1];
12595           break;
12596
12597         case ENCODE_RELAX_STATE (COND_JUMP, BIG):
12598         case ENCODE_RELAX_STATE (COND_JUMP86, BIG):
12599           extension = 5;                /* 2 opcode + 4 displacement  */
12600           opcode[1] = opcode[0] + 0x10;
12601           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12602           where_to_put_displacement = &opcode[2];
12603           break;
12604
12605         case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
12606           extension = 3;                /* 2 opcode + 2 displacement  */
12607           opcode[1] = opcode[0] + 0x10;
12608           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12609           where_to_put_displacement = &opcode[2];
12610           break;
12611
12612         case ENCODE_RELAX_STATE (COND_JUMP86, BIG16):
12613           extension = 4;
12614           opcode[0] ^= 1;
12615           opcode[1] = 3;
12616           opcode[2] = 0xe9;
12617           where_to_put_displacement = &opcode[3];
12618           break;
12619
12620         default:
12621           BAD_CASE (fragP->fr_subtype);
12622           break;
12623         }
12624     }
12625
12626   /* If size if less then four we are sure that the operand fits,
12627      but if it's 4, then it could be that the displacement is larger
12628      then -/+ 2GB.  */
12629   if (DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype) == 4
12630       && object_64bit
12631       && ((addressT) (displacement_from_opcode_start - extension
12632                       + ((addressT) 1 << 31))
12633           > (((addressT) 2 << 31) - 1)))
12634     {
12635       as_bad_where (fragP->fr_file, fragP->fr_line,
12636                     _("jump target out of range"));
12637       /* Make us emit 0.  */
12638       displacement_from_opcode_start = extension;
12639     }
12640   /* Now put displacement after opcode.  */
12641   md_number_to_chars ((char *) where_to_put_displacement,
12642                       (valueT) (displacement_from_opcode_start - extension),
12643                       DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype));
12644   fragP->fr_fix += extension;
12645 }
12646 \f
12647 /* Apply a fixup (fixP) to segment data, once it has been determined
12648    by our caller that we have all the info we need to fix it up.
12649
12650    Parameter valP is the pointer to the value of the bits.
12651
12652    On the 386, immediates, displacements, and data pointers are all in
12653    the same (little-endian) format, so we don't need to care about which
12654    we are handling.  */
12655
12656 void
12657 md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED)
12658 {
12659   char *p = fixP->fx_where + fixP->fx_frag->fr_literal;
12660   valueT value = *valP;
12661
12662 #if !defined (TE_Mach)
12663   if (fixP->fx_pcrel)
12664     {
12665       switch (fixP->fx_r_type)
12666         {
12667         default:
12668           break;
12669
12670         case BFD_RELOC_64:
12671           fixP->fx_r_type = BFD_RELOC_64_PCREL;
12672           break;
12673         case BFD_RELOC_32:
12674         case BFD_RELOC_X86_64_32S:
12675           fixP->fx_r_type = BFD_RELOC_32_PCREL;
12676           break;
12677         case BFD_RELOC_16:
12678           fixP->fx_r_type = BFD_RELOC_16_PCREL;
12679           break;
12680         case BFD_RELOC_8:
12681           fixP->fx_r_type = BFD_RELOC_8_PCREL;
12682           break;
12683         }
12684     }
12685
12686   if (fixP->fx_addsy != NULL
12687       && (fixP->fx_r_type == BFD_RELOC_32_PCREL
12688           || fixP->fx_r_type == BFD_RELOC_64_PCREL
12689           || fixP->fx_r_type == BFD_RELOC_16_PCREL
12690           || fixP->fx_r_type == BFD_RELOC_8_PCREL)
12691       && !use_rela_relocations)
12692     {
12693       /* This is a hack.  There should be a better way to handle this.
12694          This covers for the fact that bfd_install_relocation will
12695          subtract the current location (for partial_inplace, PC relative
12696          relocations); see more below.  */
12697 #ifndef OBJ_AOUT
12698       if (IS_ELF
12699 #ifdef TE_PE
12700           || OUTPUT_FLAVOR == bfd_target_coff_flavour
12701 #endif
12702           )
12703         value += fixP->fx_where + fixP->fx_frag->fr_address;
12704 #endif
12705 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12706       if (IS_ELF)
12707         {
12708           segT sym_seg = S_GET_SEGMENT (fixP->fx_addsy);
12709
12710           if ((sym_seg == seg
12711                || (symbol_section_p (fixP->fx_addsy)
12712                    && sym_seg != absolute_section))
12713               && !generic_force_reloc (fixP))
12714             {
12715               /* Yes, we add the values in twice.  This is because
12716                  bfd_install_relocation subtracts them out again.  I think
12717                  bfd_install_relocation is broken, but I don't dare change
12718                  it.  FIXME.  */
12719               value += fixP->fx_where + fixP->fx_frag->fr_address;
12720             }
12721         }
12722 #endif
12723 #if defined (OBJ_COFF) && defined (TE_PE)
12724       /* For some reason, the PE format does not store a
12725          section address offset for a PC relative symbol.  */
12726       if (S_GET_SEGMENT (fixP->fx_addsy) != seg
12727           || S_IS_WEAK (fixP->fx_addsy))
12728         value += md_pcrel_from (fixP);
12729 #endif
12730     }
12731 #if defined (OBJ_COFF) && defined (TE_PE)
12732   if (fixP->fx_addsy != NULL
12733       && S_IS_WEAK (fixP->fx_addsy)
12734       /* PR 16858: Do not modify weak function references.  */
12735       && ! fixP->fx_pcrel)
12736     {
12737 #if !defined (TE_PEP)
12738       /* For x86 PE weak function symbols are neither PC-relative
12739          nor do they set S_IS_FUNCTION.  So the only reliable way
12740          to detect them is to check the flags of their containing
12741          section.  */
12742       if (S_GET_SEGMENT (fixP->fx_addsy) != NULL
12743           && S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_CODE)
12744         ;
12745       else
12746 #endif
12747       value -= S_GET_VALUE (fixP->fx_addsy);
12748     }
12749 #endif
12750
12751   /* Fix a few things - the dynamic linker expects certain values here,
12752      and we must not disappoint it.  */
12753 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12754   if (IS_ELF && fixP->fx_addsy)
12755     switch (fixP->fx_r_type)
12756       {
12757       case BFD_RELOC_386_PLT32:
12758       case BFD_RELOC_X86_64_PLT32:
12759         /* Make the jump instruction point to the address of the operand.
12760            At runtime we merely add the offset to the actual PLT entry.
12761            NB: Subtract the offset size only for jump instructions.  */
12762         if (fixP->fx_pcrel)
12763           value = -4;
12764         break;
12765
12766       case BFD_RELOC_386_TLS_GD:
12767       case BFD_RELOC_386_TLS_LDM:
12768       case BFD_RELOC_386_TLS_IE_32:
12769       case BFD_RELOC_386_TLS_IE:
12770       case BFD_RELOC_386_TLS_GOTIE:
12771       case BFD_RELOC_386_TLS_GOTDESC:
12772       case BFD_RELOC_X86_64_TLSGD:
12773       case BFD_RELOC_X86_64_TLSLD:
12774       case BFD_RELOC_X86_64_GOTTPOFF:
12775       case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
12776         value = 0; /* Fully resolved at runtime.  No addend.  */
12777         /* Fallthrough */
12778       case BFD_RELOC_386_TLS_LE:
12779       case BFD_RELOC_386_TLS_LDO_32:
12780       case BFD_RELOC_386_TLS_LE_32:
12781       case BFD_RELOC_X86_64_DTPOFF32:
12782       case BFD_RELOC_X86_64_DTPOFF64:
12783       case BFD_RELOC_X86_64_TPOFF32:
12784       case BFD_RELOC_X86_64_TPOFF64:
12785         S_SET_THREAD_LOCAL (fixP->fx_addsy);
12786         break;
12787
12788       case BFD_RELOC_386_TLS_DESC_CALL:
12789       case BFD_RELOC_X86_64_TLSDESC_CALL:
12790         value = 0; /* Fully resolved at runtime.  No addend.  */
12791         S_SET_THREAD_LOCAL (fixP->fx_addsy);
12792         fixP->fx_done = 0;
12793         return;
12794
12795       case BFD_RELOC_VTABLE_INHERIT:
12796       case BFD_RELOC_VTABLE_ENTRY:
12797         fixP->fx_done = 0;
12798         return;
12799
12800       default:
12801         break;
12802       }
12803 #endif /* defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)  */
12804
12805   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
12806   if (!object_64bit)
12807     value = extend_to_32bit_address (value);
12808
12809   *valP = value;
12810 #endif /* !defined (TE_Mach)  */
12811
12812   /* Are we finished with this relocation now?  */
12813   if (fixP->fx_addsy == NULL)
12814     {
12815       fixP->fx_done = 1;
12816       switch (fixP->fx_r_type)
12817         {
12818         case BFD_RELOC_X86_64_32S:
12819           fixP->fx_signed = 1;
12820           break;
12821
12822         default:
12823           break;
12824         }
12825     }
12826 #if defined (OBJ_COFF) && defined (TE_PE)
12827   else if (fixP->fx_addsy != NULL && S_IS_WEAK (fixP->fx_addsy))
12828     {
12829       fixP->fx_done = 0;
12830       /* Remember value for tc_gen_reloc.  */
12831       fixP->fx_addnumber = value;
12832       /* Clear out the frag for now.  */
12833       value = 0;
12834     }
12835 #endif
12836   else if (use_rela_relocations)
12837     {
12838       if (!disallow_64bit_reloc || fixP->fx_r_type == NO_RELOC)
12839         fixP->fx_no_overflow = 1;
12840       /* Remember value for tc_gen_reloc.  */
12841       fixP->fx_addnumber = value;
12842       value = 0;
12843     }
12844
12845   md_number_to_chars (p, value, fixP->fx_size);
12846 }
12847 \f
12848 const char *
12849 md_atof (int type, char *litP, int *sizeP)
12850 {
12851   /* This outputs the LITTLENUMs in REVERSE order;
12852      in accord with the bigendian 386.  */
12853   return ieee_md_atof (type, litP, sizeP, false);
12854 }
12855 \f
12856 static char output_invalid_buf[sizeof (unsigned char) * 2 + 6];
12857
12858 static char *
12859 output_invalid (int c)
12860 {
12861   if (ISPRINT (c))
12862     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
12863               "'%c'", c);
12864   else
12865     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
12866               "(0x%x)", (unsigned char) c);
12867   return output_invalid_buf;
12868 }
12869
12870 /* Verify that @r can be used in the current context.  */
12871
12872 static bool check_register (const reg_entry *r)
12873 {
12874   if (allow_pseudo_reg)
12875     return true;
12876
12877   if (operand_type_all_zero (&r->reg_type))
12878     return false;
12879
12880   if ((r->reg_type.bitfield.dword
12881        || (r->reg_type.bitfield.class == SReg && r->reg_num > 3)
12882        || r->reg_type.bitfield.class == RegCR
12883        || r->reg_type.bitfield.class == RegDR)
12884       && !cpu_arch_flags.bitfield.cpui386)
12885     return false;
12886
12887   if (r->reg_type.bitfield.class == RegTR
12888       && (flag_code == CODE_64BIT
12889           || !cpu_arch_flags.bitfield.cpui386
12890           || cpu_arch_isa_flags.bitfield.cpui586
12891           || cpu_arch_isa_flags.bitfield.cpui686))
12892     return false;
12893
12894   if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
12895     return false;
12896
12897   if (!cpu_arch_flags.bitfield.cpuavx512f)
12898     {
12899       if (r->reg_type.bitfield.zmmword
12900           || r->reg_type.bitfield.class == RegMask)
12901         return false;
12902
12903       if (!cpu_arch_flags.bitfield.cpuavx)
12904         {
12905           if (r->reg_type.bitfield.ymmword)
12906             return false;
12907
12908           if (!cpu_arch_flags.bitfield.cpusse && r->reg_type.bitfield.xmmword)
12909             return false;
12910         }
12911     }
12912
12913   if (r->reg_type.bitfield.tmmword
12914       && (!cpu_arch_flags.bitfield.cpuamx_tile
12915           || flag_code != CODE_64BIT))
12916     return false;
12917
12918   if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx)
12919     return false;
12920
12921   /* Don't allow fake index register unless allow_index_reg isn't 0. */
12922   if (!allow_index_reg && r->reg_num == RegIZ)
12923     return false;
12924
12925   /* Upper 16 vector registers are only available with VREX in 64bit
12926      mode, and require EVEX encoding.  */
12927   if (r->reg_flags & RegVRex)
12928     {
12929       if (!cpu_arch_flags.bitfield.cpuavx512f
12930           || flag_code != CODE_64BIT)
12931         return false;
12932
12933       if (i.vec_encoding == vex_encoding_default)
12934         i.vec_encoding = vex_encoding_evex;
12935       else if (i.vec_encoding != vex_encoding_evex)
12936         i.vec_encoding = vex_encoding_error;
12937     }
12938
12939   if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
12940       && (!cpu_arch_flags.bitfield.cpulm || r->reg_type.bitfield.class != RegCR)
12941       && flag_code != CODE_64BIT)
12942     return false;
12943
12944   if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat
12945       && !intel_syntax)
12946     return false;
12947
12948   return true;
12949 }
12950
12951 /* REG_STRING starts *before* REGISTER_PREFIX.  */
12952
12953 static const reg_entry *
12954 parse_real_register (char *reg_string, char **end_op)
12955 {
12956   char *s = reg_string;
12957   char *p;
12958   char reg_name_given[MAX_REG_NAME_SIZE + 1];
12959   const reg_entry *r;
12960
12961   /* Skip possible REGISTER_PREFIX and possible whitespace.  */
12962   if (*s == REGISTER_PREFIX)
12963     ++s;
12964
12965   if (is_space_char (*s))
12966     ++s;
12967
12968   p = reg_name_given;
12969   while ((*p++ = register_chars[(unsigned char) *s]) != '\0')
12970     {
12971       if (p >= reg_name_given + MAX_REG_NAME_SIZE)
12972         return (const reg_entry *) NULL;
12973       s++;
12974     }
12975
12976   /* For naked regs, make sure that we are not dealing with an identifier.
12977      This prevents confusing an identifier like `eax_var' with register
12978      `eax'.  */
12979   if (allow_naked_reg && identifier_chars[(unsigned char) *s])
12980     return (const reg_entry *) NULL;
12981
12982   *end_op = s;
12983
12984   r = (const reg_entry *) str_hash_find (reg_hash, reg_name_given);
12985
12986   /* Handle floating point regs, allowing spaces in the (i) part.  */
12987   if (r == reg_st0)
12988     {
12989       if (!cpu_arch_flags.bitfield.cpu8087
12990           && !cpu_arch_flags.bitfield.cpu287
12991           && !cpu_arch_flags.bitfield.cpu387
12992           && !allow_pseudo_reg)
12993         return (const reg_entry *) NULL;
12994
12995       if (is_space_char (*s))
12996         ++s;
12997       if (*s == '(')
12998         {
12999           ++s;
13000           if (is_space_char (*s))
13001             ++s;
13002           if (*s >= '0' && *s <= '7')
13003             {
13004               int fpr = *s - '0';
13005               ++s;
13006               if (is_space_char (*s))
13007                 ++s;
13008               if (*s == ')')
13009                 {
13010                   *end_op = s + 1;
13011                   know (r[fpr].reg_num == fpr);
13012                   return r + fpr;
13013                 }
13014             }
13015           /* We have "%st(" then garbage.  */
13016           return (const reg_entry *) NULL;
13017         }
13018     }
13019
13020   return r && check_register (r) ? r : NULL;
13021 }
13022
13023 /* REG_STRING starts *before* REGISTER_PREFIX.  */
13024
13025 static const reg_entry *
13026 parse_register (char *reg_string, char **end_op)
13027 {
13028   const reg_entry *r;
13029
13030   if (*reg_string == REGISTER_PREFIX || allow_naked_reg)
13031     r = parse_real_register (reg_string, end_op);
13032   else
13033     r = NULL;
13034   if (!r)
13035     {
13036       char *save = input_line_pointer;
13037       char c;
13038       symbolS *symbolP;
13039
13040       input_line_pointer = reg_string;
13041       c = get_symbol_name (&reg_string);
13042       symbolP = symbol_find (reg_string);
13043       while (symbolP && S_GET_SEGMENT (symbolP) != reg_section)
13044         {
13045           const expressionS *e = symbol_get_value_expression(symbolP);
13046
13047           if (e->X_op != O_symbol || e->X_add_number)
13048             break;
13049           symbolP = e->X_add_symbol;
13050         }
13051       if (symbolP && S_GET_SEGMENT (symbolP) == reg_section)
13052         {
13053           const expressionS *e = symbol_get_value_expression (symbolP);
13054
13055           know (e->X_op == O_register);
13056           know (e->X_add_number >= 0
13057                 && (valueT) e->X_add_number < i386_regtab_size);
13058           r = i386_regtab + e->X_add_number;
13059           if (!check_register (r))
13060             {
13061               as_bad (_("register '%s%s' cannot be used here"),
13062                       register_prefix, r->reg_name);
13063               r = &bad_reg;
13064             }
13065           *end_op = input_line_pointer;
13066         }
13067       *input_line_pointer = c;
13068       input_line_pointer = save;
13069     }
13070   return r;
13071 }
13072
13073 int
13074 i386_parse_name (char *name, expressionS *e, char *nextcharP)
13075 {
13076   const reg_entry *r = NULL;
13077   char *end = input_line_pointer;
13078
13079   *end = *nextcharP;
13080   if (*name == REGISTER_PREFIX || allow_naked_reg)
13081     r = parse_real_register (name, &input_line_pointer);
13082   if (r && end <= input_line_pointer)
13083     {
13084       *nextcharP = *input_line_pointer;
13085       *input_line_pointer = 0;
13086       if (r != &bad_reg)
13087         {
13088           e->X_op = O_register;
13089           e->X_add_number = r - i386_regtab;
13090         }
13091       else
13092           e->X_op = O_illegal;
13093       return 1;
13094     }
13095   input_line_pointer = end;
13096   *end = 0;
13097   return intel_syntax ? i386_intel_parse_name (name, e) : 0;
13098 }
13099
13100 void
13101 md_operand (expressionS *e)
13102 {
13103   char *end;
13104   const reg_entry *r;
13105
13106   switch (*input_line_pointer)
13107     {
13108     case REGISTER_PREFIX:
13109       r = parse_real_register (input_line_pointer, &end);
13110       if (r)
13111         {
13112           e->X_op = O_register;
13113           e->X_add_number = r - i386_regtab;
13114           input_line_pointer = end;
13115         }
13116       break;
13117
13118     case '[':
13119       gas_assert (intel_syntax);
13120       end = input_line_pointer++;
13121       expression (e);
13122       if (*input_line_pointer == ']')
13123         {
13124           ++input_line_pointer;
13125           e->X_op_symbol = make_expr_symbol (e);
13126           e->X_add_symbol = NULL;
13127           e->X_add_number = 0;
13128           e->X_op = O_index;
13129         }
13130       else
13131         {
13132           e->X_op = O_absent;
13133           input_line_pointer = end;
13134         }
13135       break;
13136     }
13137 }
13138
13139 \f
13140 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13141 const char *md_shortopts = "kVQ:sqnO::";
13142 #else
13143 const char *md_shortopts = "qnO::";
13144 #endif
13145
13146 #define OPTION_32 (OPTION_MD_BASE + 0)
13147 #define OPTION_64 (OPTION_MD_BASE + 1)
13148 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
13149 #define OPTION_MARCH (OPTION_MD_BASE + 3)
13150 #define OPTION_MTUNE (OPTION_MD_BASE + 4)
13151 #define OPTION_MMNEMONIC (OPTION_MD_BASE + 5)
13152 #define OPTION_MSYNTAX (OPTION_MD_BASE + 6)
13153 #define OPTION_MINDEX_REG (OPTION_MD_BASE + 7)
13154 #define OPTION_MNAKED_REG (OPTION_MD_BASE + 8)
13155 #define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 9)
13156 #define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
13157 #define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
13158 #define OPTION_MOPERAND_CHECK (OPTION_MD_BASE + 12)
13159 #define OPTION_MAVXSCALAR (OPTION_MD_BASE + 13)
13160 #define OPTION_X32 (OPTION_MD_BASE + 14)
13161 #define OPTION_MADD_BND_PREFIX (OPTION_MD_BASE + 15)
13162 #define OPTION_MEVEXLIG (OPTION_MD_BASE + 16)
13163 #define OPTION_MEVEXWIG (OPTION_MD_BASE + 17)
13164 #define OPTION_MBIG_OBJ (OPTION_MD_BASE + 18)
13165 #define OPTION_MOMIT_LOCK_PREFIX (OPTION_MD_BASE + 19)
13166 #define OPTION_MEVEXRCIG (OPTION_MD_BASE + 20)
13167 #define OPTION_MSHARED (OPTION_MD_BASE + 21)
13168 #define OPTION_MAMD64 (OPTION_MD_BASE + 22)
13169 #define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
13170 #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
13171 #define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25)
13172 #define OPTION_MVEXWIG (OPTION_MD_BASE + 26)
13173 #define OPTION_MALIGN_BRANCH_BOUNDARY (OPTION_MD_BASE + 27)
13174 #define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28)
13175 #define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29)
13176 #define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30)
13177 #define OPTION_MLFENCE_AFTER_LOAD (OPTION_MD_BASE + 31)
13178 #define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32)
13179 #define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33)
13180 #define OPTION_MUSE_UNALIGNED_VECTOR_MOVE (OPTION_MD_BASE + 34)
13181
13182 struct option md_longopts[] =
13183 {
13184   {"32", no_argument, NULL, OPTION_32},
13185 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13186      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13187   {"64", no_argument, NULL, OPTION_64},
13188 #endif
13189 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13190   {"x32", no_argument, NULL, OPTION_X32},
13191   {"mshared", no_argument, NULL, OPTION_MSHARED},
13192   {"mx86-used-note", required_argument, NULL, OPTION_X86_USED_NOTE},
13193 #endif
13194   {"divide", no_argument, NULL, OPTION_DIVIDE},
13195   {"march", required_argument, NULL, OPTION_MARCH},
13196   {"mtune", required_argument, NULL, OPTION_MTUNE},
13197   {"mmnemonic", required_argument, NULL, OPTION_MMNEMONIC},
13198   {"msyntax", required_argument, NULL, OPTION_MSYNTAX},
13199   {"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
13200   {"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
13201   {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
13202   {"muse-unaligned-vector-move", no_argument, NULL, OPTION_MUSE_UNALIGNED_VECTOR_MOVE},
13203   {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
13204   {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
13205   {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
13206   {"mvexwig", required_argument, NULL, OPTION_MVEXWIG},
13207   {"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX},
13208   {"mevexlig", required_argument, NULL, OPTION_MEVEXLIG},
13209   {"mevexwig", required_argument, NULL, OPTION_MEVEXWIG},
13210 # if defined (TE_PE) || defined (TE_PEP)
13211   {"mbig-obj", no_argument, NULL, OPTION_MBIG_OBJ},
13212 #endif
13213   {"momit-lock-prefix", required_argument, NULL, OPTION_MOMIT_LOCK_PREFIX},
13214   {"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD},
13215   {"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS},
13216   {"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG},
13217   {"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY},
13218   {"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
13219   {"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
13220   {"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES},
13221   {"mlfence-after-load", required_argument, NULL, OPTION_MLFENCE_AFTER_LOAD},
13222   {"mlfence-before-indirect-branch", required_argument, NULL,
13223    OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH},
13224   {"mlfence-before-ret", required_argument, NULL, OPTION_MLFENCE_BEFORE_RET},
13225   {"mamd64", no_argument, NULL, OPTION_MAMD64},
13226   {"mintel64", no_argument, NULL, OPTION_MINTEL64},
13227   {NULL, no_argument, NULL, 0}
13228 };
13229 size_t md_longopts_size = sizeof (md_longopts);
13230
13231 int
13232 md_parse_option (int c, const char *arg)
13233 {
13234   unsigned int j;
13235   char *arch, *next, *saved, *type;
13236
13237   switch (c)
13238     {
13239     case 'n':
13240       optimize_align_code = 0;
13241       break;
13242
13243     case 'q':
13244       quiet_warnings = 1;
13245       break;
13246
13247 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13248       /* -Qy, -Qn: SVR4 arguments controlling whether a .comment section
13249          should be emitted or not.  FIXME: Not implemented.  */
13250     case 'Q':
13251       if ((arg[0] != 'y' && arg[0] != 'n') || arg[1])
13252         return 0;
13253       break;
13254
13255       /* -V: SVR4 argument to print version ID.  */
13256     case 'V':
13257       print_version_id ();
13258       break;
13259
13260       /* -k: Ignore for FreeBSD compatibility.  */
13261     case 'k':
13262       break;
13263
13264     case 's':
13265       /* -s: On i386 Solaris, this tells the native assembler to use
13266          .stab instead of .stab.excl.  We always use .stab anyhow.  */
13267       break;
13268
13269     case OPTION_MSHARED:
13270       shared = 1;
13271       break;
13272
13273     case OPTION_X86_USED_NOTE:
13274       if (strcasecmp (arg, "yes") == 0)
13275         x86_used_note = 1;
13276       else if (strcasecmp (arg, "no") == 0)
13277         x86_used_note = 0;
13278       else
13279         as_fatal (_("invalid -mx86-used-note= option: `%s'"), arg);
13280       break;
13281
13282
13283 #endif
13284 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13285      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13286     case OPTION_64:
13287       {
13288         const char **list, **l;
13289
13290         list = bfd_target_list ();
13291         for (l = list; *l != NULL; l++)
13292           if (startswith (*l, "elf64-x86-64")
13293               || strcmp (*l, "coff-x86-64") == 0
13294               || strcmp (*l, "pe-x86-64") == 0
13295               || strcmp (*l, "pei-x86-64") == 0
13296               || strcmp (*l, "mach-o-x86-64") == 0)
13297             {
13298               default_arch = "x86_64";
13299               break;
13300             }
13301         if (*l == NULL)
13302           as_fatal (_("no compiled in support for x86_64"));
13303         free (list);
13304       }
13305       break;
13306 #endif
13307
13308 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13309     case OPTION_X32:
13310       if (IS_ELF)
13311         {
13312           const char **list, **l;
13313
13314           list = bfd_target_list ();
13315           for (l = list; *l != NULL; l++)
13316             if (startswith (*l, "elf32-x86-64"))
13317               {
13318                 default_arch = "x86_64:32";
13319                 break;
13320               }
13321           if (*l == NULL)
13322             as_fatal (_("no compiled in support for 32bit x86_64"));
13323           free (list);
13324         }
13325       else
13326         as_fatal (_("32bit x86_64 is only supported for ELF"));
13327       break;
13328 #endif
13329
13330     case OPTION_32:
13331       default_arch = "i386";
13332       break;
13333
13334     case OPTION_DIVIDE:
13335 #ifdef SVR4_COMMENT_CHARS
13336       {
13337         char *n, *t;
13338         const char *s;
13339
13340         n = XNEWVEC (char, strlen (i386_comment_chars) + 1);
13341         t = n;
13342         for (s = i386_comment_chars; *s != '\0'; s++)
13343           if (*s != '/')
13344             *t++ = *s;
13345         *t = '\0';
13346         i386_comment_chars = n;
13347       }
13348 #endif
13349       break;
13350
13351     case OPTION_MARCH:
13352       saved = xstrdup (arg);
13353       arch = saved;
13354       /* Allow -march=+nosse.  */
13355       if (*arch == '+')
13356         arch++;
13357       do
13358         {
13359           if (*arch == '.')
13360             as_fatal (_("invalid -march= option: `%s'"), arg);
13361           next = strchr (arch, '+');
13362           if (next)
13363             *next++ = '\0';
13364           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13365             {
13366               if (arch == saved && cpu_arch[j].type != PROCESSOR_NONE
13367                   && strcmp (arch, cpu_arch[j].name) == 0)
13368                 {
13369                   /* Processor.  */
13370                   if (! cpu_arch[j].enable.bitfield.cpui386)
13371                     continue;
13372
13373                   cpu_arch_name = cpu_arch[j].name;
13374                   free (cpu_sub_arch_name);
13375                   cpu_sub_arch_name = NULL;
13376                   cpu_arch_flags = cpu_arch[j].enable;
13377                   cpu_arch_isa = cpu_arch[j].type;
13378                   cpu_arch_isa_flags = cpu_arch[j].enable;
13379                   if (!cpu_arch_tune_set)
13380                     {
13381                       cpu_arch_tune = cpu_arch_isa;
13382                       cpu_arch_tune_flags = cpu_arch_isa_flags;
13383                     }
13384                   break;
13385                 }
13386               else if (cpu_arch[j].type == PROCESSOR_NONE
13387                        && strcmp (arch, cpu_arch[j].name) == 0
13388                        && !cpu_flags_all_zero (&cpu_arch[j].enable))
13389                 {
13390                   /* ISA extension.  */
13391                   i386_cpu_flags flags;
13392
13393                   flags = cpu_flags_or (cpu_arch_flags,
13394                                         cpu_arch[j].enable);
13395
13396                   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
13397                     {
13398                       extend_cpu_sub_arch_name (arch);
13399                       cpu_arch_flags = flags;
13400                       cpu_arch_isa_flags = flags;
13401                     }
13402                   else
13403                     cpu_arch_isa_flags
13404                       = cpu_flags_or (cpu_arch_isa_flags,
13405                                       cpu_arch[j].enable);
13406                   break;
13407                 }
13408             }
13409
13410           if (j >= ARRAY_SIZE (cpu_arch) && startswith (arch, "no"))
13411             {
13412               /* Disable an ISA extension.  */
13413               for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13414                 if (cpu_arch[j].type == PROCESSOR_NONE
13415                     && strcmp (arch + 2, cpu_arch[j].name) == 0)
13416                   {
13417                     i386_cpu_flags flags;
13418
13419                     flags = cpu_flags_and_not (cpu_arch_flags,
13420                                                cpu_arch[j].disable);
13421                     if (!cpu_flags_equal (&flags, &cpu_arch_flags))
13422                       {
13423                         extend_cpu_sub_arch_name (arch);
13424                         cpu_arch_flags = flags;
13425                         cpu_arch_isa_flags = flags;
13426                       }
13427                     break;
13428                   }
13429             }
13430
13431           if (j >= ARRAY_SIZE (cpu_arch))
13432             as_fatal (_("invalid -march= option: `%s'"), arg);
13433
13434           arch = next;
13435         }
13436       while (next != NULL);
13437       free (saved);
13438       break;
13439
13440     case OPTION_MTUNE:
13441       if (*arg == '.')
13442         as_fatal (_("invalid -mtune= option: `%s'"), arg);
13443       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13444         {
13445           if (cpu_arch[j].type != PROCESSOR_NONE
13446               && strcmp (arg, cpu_arch[j].name) == 0)
13447             {
13448               cpu_arch_tune_set = 1;
13449               cpu_arch_tune = cpu_arch [j].type;
13450               cpu_arch_tune_flags = cpu_arch[j].enable;
13451               break;
13452             }
13453         }
13454       if (j >= ARRAY_SIZE (cpu_arch))
13455         as_fatal (_("invalid -mtune= option: `%s'"), arg);
13456       break;
13457
13458     case OPTION_MMNEMONIC:
13459       if (strcasecmp (arg, "att") == 0)
13460         intel_mnemonic = 0;
13461       else if (strcasecmp (arg, "intel") == 0)
13462         intel_mnemonic = 1;
13463       else
13464         as_fatal (_("invalid -mmnemonic= option: `%s'"), arg);
13465       break;
13466
13467     case OPTION_MSYNTAX:
13468       if (strcasecmp (arg, "att") == 0)
13469         intel_syntax = 0;
13470       else if (strcasecmp (arg, "intel") == 0)
13471         intel_syntax = 1;
13472       else
13473         as_fatal (_("invalid -msyntax= option: `%s'"), arg);
13474       break;
13475
13476     case OPTION_MINDEX_REG:
13477       allow_index_reg = 1;
13478       break;
13479
13480     case OPTION_MNAKED_REG:
13481       allow_naked_reg = 1;
13482       break;
13483
13484     case OPTION_MSSE2AVX:
13485       sse2avx = 1;
13486       break;
13487
13488     case OPTION_MUSE_UNALIGNED_VECTOR_MOVE:
13489       use_unaligned_vector_move = 1;
13490       break;
13491
13492     case OPTION_MSSE_CHECK:
13493       if (strcasecmp (arg, "error") == 0)
13494         sse_check = check_error;
13495       else if (strcasecmp (arg, "warning") == 0)
13496         sse_check = check_warning;
13497       else if (strcasecmp (arg, "none") == 0)
13498         sse_check = check_none;
13499       else
13500         as_fatal (_("invalid -msse-check= option: `%s'"), arg);
13501       break;
13502
13503     case OPTION_MOPERAND_CHECK:
13504       if (strcasecmp (arg, "error") == 0)
13505         operand_check = check_error;
13506       else if (strcasecmp (arg, "warning") == 0)
13507         operand_check = check_warning;
13508       else if (strcasecmp (arg, "none") == 0)
13509         operand_check = check_none;
13510       else
13511         as_fatal (_("invalid -moperand-check= option: `%s'"), arg);
13512       break;
13513
13514     case OPTION_MAVXSCALAR:
13515       if (strcasecmp (arg, "128") == 0)
13516         avxscalar = vex128;
13517       else if (strcasecmp (arg, "256") == 0)
13518         avxscalar = vex256;
13519       else
13520         as_fatal (_("invalid -mavxscalar= option: `%s'"), arg);
13521       break;
13522
13523     case OPTION_MVEXWIG:
13524       if (strcmp (arg, "0") == 0)
13525         vexwig = vexw0;
13526       else if (strcmp (arg, "1") == 0)
13527         vexwig = vexw1;
13528       else
13529         as_fatal (_("invalid -mvexwig= option: `%s'"), arg);
13530       break;
13531
13532     case OPTION_MADD_BND_PREFIX:
13533       add_bnd_prefix = 1;
13534       break;
13535
13536     case OPTION_MEVEXLIG:
13537       if (strcmp (arg, "128") == 0)
13538         evexlig = evexl128;
13539       else if (strcmp (arg, "256") == 0)
13540         evexlig = evexl256;
13541       else  if (strcmp (arg, "512") == 0)
13542         evexlig = evexl512;
13543       else
13544         as_fatal (_("invalid -mevexlig= option: `%s'"), arg);
13545       break;
13546
13547     case OPTION_MEVEXRCIG:
13548       if (strcmp (arg, "rne") == 0)
13549         evexrcig = rne;
13550       else if (strcmp (arg, "rd") == 0)
13551         evexrcig = rd;
13552       else if (strcmp (arg, "ru") == 0)
13553         evexrcig = ru;
13554       else if (strcmp (arg, "rz") == 0)
13555         evexrcig = rz;
13556       else
13557         as_fatal (_("invalid -mevexrcig= option: `%s'"), arg);
13558       break;
13559
13560     case OPTION_MEVEXWIG:
13561       if (strcmp (arg, "0") == 0)
13562         evexwig = evexw0;
13563       else if (strcmp (arg, "1") == 0)
13564         evexwig = evexw1;
13565       else
13566         as_fatal (_("invalid -mevexwig= option: `%s'"), arg);
13567       break;
13568
13569 # if defined (TE_PE) || defined (TE_PEP)
13570     case OPTION_MBIG_OBJ:
13571       use_big_obj = 1;
13572       break;
13573 #endif
13574
13575     case OPTION_MOMIT_LOCK_PREFIX:
13576       if (strcasecmp (arg, "yes") == 0)
13577         omit_lock_prefix = 1;
13578       else if (strcasecmp (arg, "no") == 0)
13579         omit_lock_prefix = 0;
13580       else
13581         as_fatal (_("invalid -momit-lock-prefix= option: `%s'"), arg);
13582       break;
13583
13584     case OPTION_MFENCE_AS_LOCK_ADD:
13585       if (strcasecmp (arg, "yes") == 0)
13586         avoid_fence = 1;
13587       else if (strcasecmp (arg, "no") == 0)
13588         avoid_fence = 0;
13589       else
13590         as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
13591       break;
13592
13593     case OPTION_MLFENCE_AFTER_LOAD:
13594       if (strcasecmp (arg, "yes") == 0)
13595         lfence_after_load = 1;
13596       else if (strcasecmp (arg, "no") == 0)
13597         lfence_after_load = 0;
13598       else
13599         as_fatal (_("invalid -mlfence-after-load= option: `%s'"), arg);
13600       break;
13601
13602     case OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH:
13603       if (strcasecmp (arg, "all") == 0)
13604         {
13605           lfence_before_indirect_branch = lfence_branch_all;
13606           if (lfence_before_ret == lfence_before_ret_none)
13607             lfence_before_ret = lfence_before_ret_shl;
13608         }
13609       else if (strcasecmp (arg, "memory") == 0)
13610         lfence_before_indirect_branch = lfence_branch_memory;
13611       else if (strcasecmp (arg, "register") == 0)
13612         lfence_before_indirect_branch = lfence_branch_register;
13613       else if (strcasecmp (arg, "none") == 0)
13614         lfence_before_indirect_branch = lfence_branch_none;
13615       else
13616         as_fatal (_("invalid -mlfence-before-indirect-branch= option: `%s'"),
13617                   arg);
13618       break;
13619
13620     case OPTION_MLFENCE_BEFORE_RET:
13621       if (strcasecmp (arg, "or") == 0)
13622         lfence_before_ret = lfence_before_ret_or;
13623       else if (strcasecmp (arg, "not") == 0)
13624         lfence_before_ret = lfence_before_ret_not;
13625       else if (strcasecmp (arg, "shl") == 0 || strcasecmp (arg, "yes") == 0)
13626         lfence_before_ret = lfence_before_ret_shl;
13627       else if (strcasecmp (arg, "none") == 0)
13628         lfence_before_ret = lfence_before_ret_none;
13629       else
13630         as_fatal (_("invalid -mlfence-before-ret= option: `%s'"),
13631                   arg);
13632       break;
13633
13634     case OPTION_MRELAX_RELOCATIONS:
13635       if (strcasecmp (arg, "yes") == 0)
13636         generate_relax_relocations = 1;
13637       else if (strcasecmp (arg, "no") == 0)
13638         generate_relax_relocations = 0;
13639       else
13640         as_fatal (_("invalid -mrelax-relocations= option: `%s'"), arg);
13641       break;
13642
13643     case OPTION_MALIGN_BRANCH_BOUNDARY:
13644       {
13645         char *end;
13646         long int align = strtoul (arg, &end, 0);
13647         if (*end == '\0')
13648           {
13649             if (align == 0)
13650               {
13651                 align_branch_power = 0;
13652                 break;
13653               }
13654             else if (align >= 16)
13655               {
13656                 int align_power;
13657                 for (align_power = 0;
13658                      (align & 1) == 0;
13659                      align >>= 1, align_power++)
13660                   continue;
13661                 /* Limit alignment power to 31.  */
13662                 if (align == 1 && align_power < 32)
13663                   {
13664                     align_branch_power = align_power;
13665                     break;
13666                   }
13667               }
13668           }
13669         as_fatal (_("invalid -malign-branch-boundary= value: %s"), arg);
13670       }
13671       break;
13672
13673     case OPTION_MALIGN_BRANCH_PREFIX_SIZE:
13674       {
13675         char *end;
13676         int align = strtoul (arg, &end, 0);
13677         /* Some processors only support 5 prefixes.  */
13678         if (*end == '\0' && align >= 0 && align < 6)
13679           {
13680             align_branch_prefix_size = align;
13681             break;
13682           }
13683         as_fatal (_("invalid -malign-branch-prefix-size= value: %s"),
13684                   arg);
13685       }
13686       break;
13687
13688     case OPTION_MALIGN_BRANCH:
13689       align_branch = 0;
13690       saved = xstrdup (arg);
13691       type = saved;
13692       do
13693         {
13694           next = strchr (type, '+');
13695           if (next)
13696             *next++ = '\0';
13697           if (strcasecmp (type, "jcc") == 0)
13698             align_branch |= align_branch_jcc_bit;
13699           else if (strcasecmp (type, "fused") == 0)
13700             align_branch |= align_branch_fused_bit;
13701           else if (strcasecmp (type, "jmp") == 0)
13702             align_branch |= align_branch_jmp_bit;
13703           else if (strcasecmp (type, "call") == 0)
13704             align_branch |= align_branch_call_bit;
13705           else if (strcasecmp (type, "ret") == 0)
13706             align_branch |= align_branch_ret_bit;
13707           else if (strcasecmp (type, "indirect") == 0)
13708             align_branch |= align_branch_indirect_bit;
13709           else
13710             as_fatal (_("invalid -malign-branch= option: `%s'"), arg);
13711           type = next;
13712         }
13713       while (next != NULL);
13714       free (saved);
13715       break;
13716
13717     case OPTION_MBRANCHES_WITH_32B_BOUNDARIES:
13718       align_branch_power = 5;
13719       align_branch_prefix_size = 5;
13720       align_branch = (align_branch_jcc_bit
13721                       | align_branch_fused_bit
13722                       | align_branch_jmp_bit);
13723       break;
13724
13725     case OPTION_MAMD64:
13726       isa64 = amd64;
13727       break;
13728
13729     case OPTION_MINTEL64:
13730       isa64 = intel64;
13731       break;
13732
13733     case 'O':
13734       if (arg == NULL)
13735         {
13736           optimize = 1;
13737           /* Turn off -Os.  */
13738           optimize_for_space = 0;
13739         }
13740       else if (*arg == 's')
13741         {
13742           optimize_for_space = 1;
13743           /* Turn on all encoding optimizations.  */
13744           optimize = INT_MAX;
13745         }
13746       else
13747         {
13748           optimize = atoi (arg);
13749           /* Turn off -Os.  */
13750           optimize_for_space = 0;
13751         }
13752       break;
13753
13754     default:
13755       return 0;
13756     }
13757   return 1;
13758 }
13759
13760 #define MESSAGE_TEMPLATE \
13761 "                                                                                "
13762
13763 static char *
13764 output_message (FILE *stream, char *p, char *message, char *start,
13765                 int *left_p, const char *name, int len)
13766 {
13767   int size = sizeof (MESSAGE_TEMPLATE);
13768   int left = *left_p;
13769
13770   /* Reserve 2 spaces for ", " or ",\0" */
13771   left -= len + 2;
13772
13773   /* Check if there is any room.  */
13774   if (left >= 0)
13775     {
13776       if (p != start)
13777         {
13778           *p++ = ',';
13779           *p++ = ' ';
13780         }
13781       p = mempcpy (p, name, len);
13782     }
13783   else
13784     {
13785       /* Output the current message now and start a new one.  */
13786       *p++ = ',';
13787       *p = '\0';
13788       fprintf (stream, "%s\n", message);
13789       p = start;
13790       left = size - (start - message) - len - 2;
13791
13792       gas_assert (left >= 0);
13793
13794       p = mempcpy (p, name, len);
13795     }
13796
13797   *left_p = left;
13798   return p;
13799 }
13800
13801 static void
13802 show_arch (FILE *stream, int ext, int check)
13803 {
13804   static char message[] = MESSAGE_TEMPLATE;
13805   char *start = message + 27;
13806   char *p;
13807   int size = sizeof (MESSAGE_TEMPLATE);
13808   int left;
13809   const char *name;
13810   int len;
13811   unsigned int j;
13812
13813   p = start;
13814   left = size - (start - message);
13815
13816   if (!ext && check)
13817     {
13818       p = output_message (stream, p, message, start, &left,
13819                           STRING_COMMA_LEN ("default"));
13820       p = output_message (stream, p, message, start, &left,
13821                           STRING_COMMA_LEN ("push"));
13822       p = output_message (stream, p, message, start, &left,
13823                           STRING_COMMA_LEN ("pop"));
13824     }
13825
13826   for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13827     {
13828       /* Should it be skipped?  */
13829       if (cpu_arch [j].skip)
13830         continue;
13831
13832       name = cpu_arch [j].name;
13833       len = cpu_arch [j].len;
13834       if (cpu_arch[j].type == PROCESSOR_NONE)
13835         {
13836           /* It is an extension.  Skip if we aren't asked to show it.  */
13837           if (!ext || cpu_flags_all_zero (&cpu_arch[j].enable))
13838             continue;
13839         }
13840       else if (ext)
13841         {
13842           /* It is an processor.  Skip if we show only extension.  */
13843           continue;
13844         }
13845       else if (check && ! cpu_arch[j].enable.bitfield.cpui386)
13846         {
13847           /* It is an impossible processor - skip.  */
13848           continue;
13849         }
13850
13851       p = output_message (stream, p, message, start, &left, name, len);
13852     }
13853
13854   /* Display disabled extensions.  */
13855   if (ext)
13856     for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13857       {
13858         char *str;
13859
13860         if (cpu_arch[j].type != PROCESSOR_NONE
13861             || !cpu_flags_all_zero (&cpu_arch[j].enable))
13862           continue;
13863         str = xasprintf ("no%s", cpu_arch[j].name);
13864         p = output_message (stream, p, message, start, &left, str,
13865                             strlen (str));
13866         free (str);
13867       }
13868
13869   *p = '\0';
13870   fprintf (stream, "%s\n", message);
13871 }
13872
13873 void
13874 md_show_usage (FILE *stream)
13875 {
13876 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13877   fprintf (stream, _("\
13878   -Qy, -Qn                ignored\n\
13879   -V                      print assembler version number\n\
13880   -k                      ignored\n"));
13881 #endif
13882   fprintf (stream, _("\
13883   -n                      do not optimize code alignment\n\
13884   -O{012s}                attempt some code optimizations\n\
13885   -q                      quieten some warnings\n"));
13886 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13887   fprintf (stream, _("\
13888   -s                      ignored\n"));
13889 #endif
13890 #ifdef BFD64
13891 # if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13892   fprintf (stream, _("\
13893   --32/--64/--x32         generate 32bit/64bit/x32 object\n"));
13894 # elif defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O)
13895   fprintf (stream, _("\
13896   --32/--64               generate 32bit/64bit object\n"));
13897 # endif
13898 #endif
13899 #ifdef SVR4_COMMENT_CHARS
13900   fprintf (stream, _("\
13901   --divide                do not treat `/' as a comment character\n"));
13902 #else
13903   fprintf (stream, _("\
13904   --divide                ignored\n"));
13905 #endif
13906   fprintf (stream, _("\
13907   -march=CPU[,+EXTENSION...]\n\
13908                           generate code for CPU and EXTENSION, CPU is one of:\n"));
13909   show_arch (stream, 0, 1);
13910   fprintf (stream, _("\
13911                           EXTENSION is combination of (possibly \"no\"-prefixed):\n"));
13912   show_arch (stream, 1, 0);
13913   fprintf (stream, _("\
13914   -mtune=CPU              optimize for CPU, CPU is one of:\n"));
13915   show_arch (stream, 0, 0);
13916   fprintf (stream, _("\
13917   -msse2avx               encode SSE instructions with VEX prefix\n"));
13918   fprintf (stream, _("\
13919   -muse-unaligned-vector-move\n\
13920                           encode aligned vector move as unaligned vector move\n"));
13921   fprintf (stream, _("\
13922   -msse-check=[none|error|warning] (default: warning)\n\
13923                           check SSE instructions\n"));
13924   fprintf (stream, _("\
13925   -moperand-check=[none|error|warning] (default: warning)\n\
13926                           check operand combinations for validity\n"));
13927   fprintf (stream, _("\
13928   -mavxscalar=[128|256] (default: 128)\n\
13929                           encode scalar AVX instructions with specific vector\n\
13930                            length\n"));
13931   fprintf (stream, _("\
13932   -mvexwig=[0|1] (default: 0)\n\
13933                           encode VEX instructions with specific VEX.W value\n\
13934                            for VEX.W bit ignored instructions\n"));
13935   fprintf (stream, _("\
13936   -mevexlig=[128|256|512] (default: 128)\n\
13937                           encode scalar EVEX instructions with specific vector\n\
13938                            length\n"));
13939   fprintf (stream, _("\
13940   -mevexwig=[0|1] (default: 0)\n\
13941                           encode EVEX instructions with specific EVEX.W value\n\
13942                            for EVEX.W bit ignored instructions\n"));
13943   fprintf (stream, _("\
13944   -mevexrcig=[rne|rd|ru|rz] (default: rne)\n\
13945                           encode EVEX instructions with specific EVEX.RC value\n\
13946                            for SAE-only ignored instructions\n"));
13947   fprintf (stream, _("\
13948   -mmnemonic=[att|intel] "));
13949   if (SYSV386_COMPAT)
13950     fprintf (stream, _("(default: att)\n"));
13951   else
13952     fprintf (stream, _("(default: intel)\n"));
13953   fprintf (stream, _("\
13954                           use AT&T/Intel mnemonic\n"));
13955   fprintf (stream, _("\
13956   -msyntax=[att|intel] (default: att)\n\
13957                           use AT&T/Intel syntax\n"));
13958   fprintf (stream, _("\
13959   -mindex-reg             support pseudo index registers\n"));
13960   fprintf (stream, _("\
13961   -mnaked-reg             don't require `%%' prefix for registers\n"));
13962   fprintf (stream, _("\
13963   -madd-bnd-prefix        add BND prefix for all valid branches\n"));
13964 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13965   fprintf (stream, _("\
13966   -mshared                disable branch optimization for shared code\n"));
13967   fprintf (stream, _("\
13968   -mx86-used-note=[no|yes] "));
13969   if (DEFAULT_X86_USED_NOTE)
13970     fprintf (stream, _("(default: yes)\n"));
13971   else
13972     fprintf (stream, _("(default: no)\n"));
13973   fprintf (stream, _("\
13974                           generate x86 used ISA and feature properties\n"));
13975 #endif
13976 #if defined (TE_PE) || defined (TE_PEP)
13977   fprintf (stream, _("\
13978   -mbig-obj               generate big object files\n"));
13979 #endif
13980   fprintf (stream, _("\
13981   -momit-lock-prefix=[no|yes] (default: no)\n\
13982                           strip all lock prefixes\n"));
13983   fprintf (stream, _("\
13984   -mfence-as-lock-add=[no|yes] (default: no)\n\
13985                           encode lfence, mfence and sfence as\n\
13986                            lock addl $0x0, (%%{re}sp)\n"));
13987   fprintf (stream, _("\
13988   -mrelax-relocations=[no|yes] "));
13989   if (DEFAULT_GENERATE_X86_RELAX_RELOCATIONS)
13990     fprintf (stream, _("(default: yes)\n"));
13991   else
13992     fprintf (stream, _("(default: no)\n"));
13993   fprintf (stream, _("\
13994                           generate relax relocations\n"));
13995   fprintf (stream, _("\
13996   -malign-branch-boundary=NUM (default: 0)\n\
13997                           align branches within NUM byte boundary\n"));
13998   fprintf (stream, _("\
13999   -malign-branch=TYPE[+TYPE...] (default: jcc+fused+jmp)\n\
14000                           TYPE is combination of jcc, fused, jmp, call, ret,\n\
14001                            indirect\n\
14002                           specify types of branches to align\n"));
14003   fprintf (stream, _("\
14004   -malign-branch-prefix-size=NUM (default: 5)\n\
14005                           align branches with NUM prefixes per instruction\n"));
14006   fprintf (stream, _("\
14007   -mbranches-within-32B-boundaries\n\
14008                           align branches within 32 byte boundary\n"));
14009   fprintf (stream, _("\
14010   -mlfence-after-load=[no|yes] (default: no)\n\
14011                           generate lfence after load\n"));
14012   fprintf (stream, _("\
14013   -mlfence-before-indirect-branch=[none|all|register|memory] (default: none)\n\
14014                           generate lfence before indirect near branch\n"));
14015   fprintf (stream, _("\
14016   -mlfence-before-ret=[none|or|not|shl|yes] (default: none)\n\
14017                           generate lfence before ret\n"));
14018   fprintf (stream, _("\
14019   -mamd64                 accept only AMD64 ISA [default]\n"));
14020   fprintf (stream, _("\
14021   -mintel64               accept only Intel64 ISA\n"));
14022 }
14023
14024 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
14025      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
14026      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
14027
14028 /* Pick the target format to use.  */
14029
14030 const char *
14031 i386_target_format (void)
14032 {
14033   if (startswith (default_arch, "x86_64"))
14034     {
14035       update_code_flag (CODE_64BIT, 1);
14036       if (default_arch[6] == '\0')
14037         x86_elf_abi = X86_64_ABI;
14038       else
14039         x86_elf_abi = X86_64_X32_ABI;
14040     }
14041   else if (!strcmp (default_arch, "i386"))
14042     update_code_flag (CODE_32BIT, 1);
14043   else if (!strcmp (default_arch, "iamcu"))
14044     {
14045       update_code_flag (CODE_32BIT, 1);
14046       if (cpu_arch_isa == PROCESSOR_UNKNOWN)
14047         {
14048           static const i386_cpu_flags iamcu_flags = CPU_IAMCU_FLAGS;
14049           cpu_arch_name = "iamcu";
14050           free (cpu_sub_arch_name);
14051           cpu_sub_arch_name = NULL;
14052           cpu_arch_flags = iamcu_flags;
14053           cpu_arch_isa = PROCESSOR_IAMCU;
14054           cpu_arch_isa_flags = iamcu_flags;
14055           if (!cpu_arch_tune_set)
14056             {
14057               cpu_arch_tune = cpu_arch_isa;
14058               cpu_arch_tune_flags = cpu_arch_isa_flags;
14059             }
14060         }
14061       else if (cpu_arch_isa != PROCESSOR_IAMCU)
14062         as_fatal (_("Intel MCU doesn't support `%s' architecture"),
14063                   cpu_arch_name);
14064     }
14065   else
14066     as_fatal (_("unknown architecture"));
14067
14068   if (cpu_flags_all_zero (&cpu_arch_isa_flags))
14069     cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
14070   if (cpu_flags_all_zero (&cpu_arch_tune_flags))
14071     cpu_arch_tune_flags = cpu_arch[flag_code == CODE_64BIT].enable;
14072
14073   switch (OUTPUT_FLAVOR)
14074     {
14075 #if defined (OBJ_MAYBE_AOUT) || defined (OBJ_AOUT)
14076     case bfd_target_aout_flavour:
14077       return AOUT_TARGET_FORMAT;
14078 #endif
14079 #if defined (OBJ_MAYBE_COFF) || defined (OBJ_COFF)
14080 # if defined (TE_PE) || defined (TE_PEP)
14081     case bfd_target_coff_flavour:
14082       if (flag_code == CODE_64BIT)
14083         {
14084           object_64bit = 1;
14085           return use_big_obj ? "pe-bigobj-x86-64" : "pe-x86-64";
14086         }
14087       return use_big_obj ? "pe-bigobj-i386" : "pe-i386";
14088 # elif defined (TE_GO32)
14089     case bfd_target_coff_flavour:
14090       return "coff-go32";
14091 # else
14092     case bfd_target_coff_flavour:
14093       return "coff-i386";
14094 # endif
14095 #endif
14096 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
14097     case bfd_target_elf_flavour:
14098       {
14099         const char *format;
14100
14101         switch (x86_elf_abi)
14102           {
14103           default:
14104             format = ELF_TARGET_FORMAT;
14105 #ifndef TE_SOLARIS
14106             tls_get_addr = "___tls_get_addr";
14107 #endif
14108             break;
14109           case X86_64_ABI:
14110             use_rela_relocations = 1;
14111             object_64bit = 1;
14112 #ifndef TE_SOLARIS
14113             tls_get_addr = "__tls_get_addr";
14114 #endif
14115             format = ELF_TARGET_FORMAT64;
14116             break;
14117           case X86_64_X32_ABI:
14118             use_rela_relocations = 1;
14119             object_64bit = 1;
14120 #ifndef TE_SOLARIS
14121             tls_get_addr = "__tls_get_addr";
14122 #endif
14123             disallow_64bit_reloc = 1;
14124             format = ELF_TARGET_FORMAT32;
14125             break;
14126           }
14127         if (cpu_arch_isa == PROCESSOR_IAMCU)
14128           {
14129             if (x86_elf_abi != I386_ABI)
14130               as_fatal (_("Intel MCU is 32bit only"));
14131             return ELF_TARGET_IAMCU_FORMAT;
14132           }
14133         else
14134           return format;
14135       }
14136 #endif
14137 #if defined (OBJ_MACH_O)
14138     case bfd_target_mach_o_flavour:
14139       if (flag_code == CODE_64BIT)
14140         {
14141           use_rela_relocations = 1;
14142           object_64bit = 1;
14143           return "mach-o-x86-64";
14144         }
14145       else
14146         return "mach-o-i386";
14147 #endif
14148     default:
14149       abort ();
14150       return NULL;
14151     }
14152 }
14153
14154 #endif /* OBJ_MAYBE_ more than one  */
14155 \f
14156 symbolS *
14157 md_undefined_symbol (char *name)
14158 {
14159   if (name[0] == GLOBAL_OFFSET_TABLE_NAME[0]
14160       && name[1] == GLOBAL_OFFSET_TABLE_NAME[1]
14161       && name[2] == GLOBAL_OFFSET_TABLE_NAME[2]
14162       && strcmp (name, GLOBAL_OFFSET_TABLE_NAME) == 0)
14163     {
14164       if (!GOT_symbol)
14165         {
14166           if (symbol_find (name))
14167             as_bad (_("GOT already in symbol table"));
14168           GOT_symbol = symbol_new (name, undefined_section,
14169                                    &zero_address_frag, 0);
14170         };
14171       return GOT_symbol;
14172     }
14173   return 0;
14174 }
14175
14176 /* Round up a section size to the appropriate boundary.  */
14177
14178 valueT
14179 md_section_align (segT segment ATTRIBUTE_UNUSED, valueT size)
14180 {
14181 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
14182   if (OUTPUT_FLAVOR == bfd_target_aout_flavour)
14183     {
14184       /* For a.out, force the section size to be aligned.  If we don't do
14185          this, BFD will align it for us, but it will not write out the
14186          final bytes of the section.  This may be a bug in BFD, but it is
14187          easier to fix it here since that is how the other a.out targets
14188          work.  */
14189       int align;
14190
14191       align = bfd_section_alignment (segment);
14192       size = ((size + (1 << align) - 1) & (-((valueT) 1 << align)));
14193     }
14194 #endif
14195
14196   return size;
14197 }
14198
14199 /* On the i386, PC-relative offsets are relative to the start of the
14200    next instruction.  That is, the address of the offset, plus its
14201    size, since the offset is always the last part of the insn.  */
14202
14203 long
14204 md_pcrel_from (fixS *fixP)
14205 {
14206   return fixP->fx_size + fixP->fx_where + fixP->fx_frag->fr_address;
14207 }
14208
14209 #ifndef I386COFF
14210
14211 static void
14212 s_bss (int ignore ATTRIBUTE_UNUSED)
14213 {
14214   int temp;
14215
14216 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14217   if (IS_ELF)
14218     obj_elf_section_change_hook ();
14219 #endif
14220   temp = get_absolute_expression ();
14221   subseg_set (bss_section, (subsegT) temp);
14222   demand_empty_rest_of_line ();
14223 }
14224
14225 #endif
14226
14227 /* Remember constant directive.  */
14228
14229 void
14230 i386_cons_align (int ignore ATTRIBUTE_UNUSED)
14231 {
14232   if (last_insn.kind != last_insn_directive
14233       && (bfd_section_flags (now_seg) & SEC_CODE))
14234     {
14235       last_insn.seg = now_seg;
14236       last_insn.kind = last_insn_directive;
14237       last_insn.name = "constant directive";
14238       last_insn.file = as_where (&last_insn.line);
14239       if (lfence_before_ret != lfence_before_ret_none)
14240         {
14241           if (lfence_before_indirect_branch != lfence_branch_none)
14242             as_warn (_("constant directive skips -mlfence-before-ret "
14243                        "and -mlfence-before-indirect-branch"));
14244           else
14245             as_warn (_("constant directive skips -mlfence-before-ret"));
14246         }
14247       else if (lfence_before_indirect_branch != lfence_branch_none)
14248         as_warn (_("constant directive skips -mlfence-before-indirect-branch"));
14249     }
14250 }
14251
14252 int
14253 i386_validate_fix (fixS *fixp)
14254 {
14255   if (fixp->fx_addsy && S_GET_SEGMENT(fixp->fx_addsy) == reg_section)
14256     {
14257       reloc_howto_type *howto;
14258
14259       howto = bfd_reloc_type_lookup (stdoutput, fixp->fx_r_type);
14260       as_bad_where (fixp->fx_file, fixp->fx_line,
14261                     _("invalid %s relocation against register"),
14262                     howto ? howto->name : "<unknown>");
14263       return 0;
14264     }
14265
14266 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14267   if (fixp->fx_r_type == BFD_RELOC_SIZE32
14268       || fixp->fx_r_type == BFD_RELOC_SIZE64)
14269     return IS_ELF && fixp->fx_addsy
14270            && (!S_IS_DEFINED (fixp->fx_addsy)
14271                || S_IS_EXTERNAL (fixp->fx_addsy));
14272 #endif
14273
14274   if (fixp->fx_subsy)
14275     {
14276       if (fixp->fx_subsy == GOT_symbol)
14277         {
14278           if (fixp->fx_r_type == BFD_RELOC_32_PCREL)
14279             {
14280               if (!object_64bit)
14281                 abort ();
14282 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14283               if (fixp->fx_tcbit2)
14284                 fixp->fx_r_type = (fixp->fx_tcbit
14285                                    ? BFD_RELOC_X86_64_REX_GOTPCRELX
14286                                    : BFD_RELOC_X86_64_GOTPCRELX);
14287               else
14288 #endif
14289                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCREL;
14290             }
14291           else
14292             {
14293               if (!object_64bit)
14294                 fixp->fx_r_type = BFD_RELOC_386_GOTOFF;
14295               else
14296                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTOFF64;
14297             }
14298           fixp->fx_subsy = 0;
14299         }
14300     }
14301 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14302   else
14303     {
14304       /* NB: Commit 292676c1 resolved PLT32 reloc aganst local symbol
14305          to section.  Since PLT32 relocation must be against symbols,
14306          turn such PLT32 relocation into PC32 relocation.  */
14307       if (fixp->fx_addsy
14308           && (fixp->fx_r_type == BFD_RELOC_386_PLT32
14309               || fixp->fx_r_type == BFD_RELOC_X86_64_PLT32)
14310           && symbol_section_p (fixp->fx_addsy))
14311         fixp->fx_r_type = BFD_RELOC_32_PCREL;
14312       if (!object_64bit)
14313         {
14314           if (fixp->fx_r_type == BFD_RELOC_386_GOT32
14315               && fixp->fx_tcbit2)
14316             fixp->fx_r_type = BFD_RELOC_386_GOT32X;
14317         }
14318     }
14319 #endif
14320
14321   return 1;
14322 }
14323
14324 arelent *
14325 tc_gen_reloc (asection *section ATTRIBUTE_UNUSED, fixS *fixp)
14326 {
14327   arelent *rel;
14328   bfd_reloc_code_real_type code;
14329
14330   switch (fixp->fx_r_type)
14331     {
14332 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14333       symbolS *sym;
14334
14335     case BFD_RELOC_SIZE32:
14336     case BFD_RELOC_SIZE64:
14337       if (fixp->fx_addsy
14338           && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))
14339           && (!fixp->fx_subsy
14340               || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))))
14341         sym = fixp->fx_addsy;
14342       else if (fixp->fx_subsy
14343                && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))
14344                && (!fixp->fx_addsy
14345                    || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))))
14346         sym = fixp->fx_subsy;
14347       else
14348         sym = NULL;
14349       if (IS_ELF && sym && S_IS_DEFINED (sym) && !S_IS_EXTERNAL (sym))
14350         {
14351           /* Resolve size relocation against local symbol to size of
14352              the symbol plus addend.  */
14353           valueT value = S_GET_SIZE (sym);
14354
14355           if (symbol_get_bfdsym (sym)->flags & BSF_SECTION_SYM)
14356             value = bfd_section_size (S_GET_SEGMENT (sym));
14357           if (sym == fixp->fx_subsy)
14358             {
14359               value = -value;
14360               if (fixp->fx_addsy)
14361                 value += S_GET_VALUE (fixp->fx_addsy);
14362             }
14363           else if (fixp->fx_subsy)
14364             value -= S_GET_VALUE (fixp->fx_subsy);
14365           value += fixp->fx_offset;
14366           if (fixp->fx_r_type == BFD_RELOC_SIZE32
14367               && object_64bit
14368               && !fits_in_unsigned_long (value))
14369             as_bad_where (fixp->fx_file, fixp->fx_line,
14370                           _("symbol size computation overflow"));
14371           fixp->fx_addsy = NULL;
14372           fixp->fx_subsy = NULL;
14373           md_apply_fix (fixp, (valueT *) &value, NULL);
14374           return NULL;
14375         }
14376       if (!fixp->fx_addsy || fixp->fx_subsy)
14377         {
14378           as_bad_where (fixp->fx_file, fixp->fx_line,
14379                         "unsupported expression involving @size");
14380           return NULL;
14381         }
14382 #endif
14383       /* Fall through.  */
14384
14385     case BFD_RELOC_X86_64_PLT32:
14386     case BFD_RELOC_X86_64_GOT32:
14387     case BFD_RELOC_X86_64_GOTPCREL:
14388     case BFD_RELOC_X86_64_GOTPCRELX:
14389     case BFD_RELOC_X86_64_REX_GOTPCRELX:
14390     case BFD_RELOC_386_PLT32:
14391     case BFD_RELOC_386_GOT32:
14392     case BFD_RELOC_386_GOT32X:
14393     case BFD_RELOC_386_GOTOFF:
14394     case BFD_RELOC_386_GOTPC:
14395     case BFD_RELOC_386_TLS_GD:
14396     case BFD_RELOC_386_TLS_LDM:
14397     case BFD_RELOC_386_TLS_LDO_32:
14398     case BFD_RELOC_386_TLS_IE_32:
14399     case BFD_RELOC_386_TLS_IE:
14400     case BFD_RELOC_386_TLS_GOTIE:
14401     case BFD_RELOC_386_TLS_LE_32:
14402     case BFD_RELOC_386_TLS_LE:
14403     case BFD_RELOC_386_TLS_GOTDESC:
14404     case BFD_RELOC_386_TLS_DESC_CALL:
14405     case BFD_RELOC_X86_64_TLSGD:
14406     case BFD_RELOC_X86_64_TLSLD:
14407     case BFD_RELOC_X86_64_DTPOFF32:
14408     case BFD_RELOC_X86_64_DTPOFF64:
14409     case BFD_RELOC_X86_64_GOTTPOFF:
14410     case BFD_RELOC_X86_64_TPOFF32:
14411     case BFD_RELOC_X86_64_TPOFF64:
14412     case BFD_RELOC_X86_64_GOTOFF64:
14413     case BFD_RELOC_X86_64_GOTPC32:
14414     case BFD_RELOC_X86_64_GOT64:
14415     case BFD_RELOC_X86_64_GOTPCREL64:
14416     case BFD_RELOC_X86_64_GOTPC64:
14417     case BFD_RELOC_X86_64_GOTPLT64:
14418     case BFD_RELOC_X86_64_PLTOFF64:
14419     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
14420     case BFD_RELOC_X86_64_TLSDESC_CALL:
14421     case BFD_RELOC_RVA:
14422     case BFD_RELOC_VTABLE_ENTRY:
14423     case BFD_RELOC_VTABLE_INHERIT:
14424 #ifdef TE_PE
14425     case BFD_RELOC_32_SECREL:
14426     case BFD_RELOC_16_SECIDX:
14427 #endif
14428       code = fixp->fx_r_type;
14429       break;
14430     case BFD_RELOC_X86_64_32S:
14431       if (!fixp->fx_pcrel)
14432         {
14433           /* Don't turn BFD_RELOC_X86_64_32S into BFD_RELOC_32.  */
14434           code = fixp->fx_r_type;
14435           break;
14436         }
14437       /* Fall through.  */
14438     default:
14439       if (fixp->fx_pcrel)
14440         {
14441           switch (fixp->fx_size)
14442             {
14443             default:
14444               as_bad_where (fixp->fx_file, fixp->fx_line,
14445                             _("can not do %d byte pc-relative relocation"),
14446                             fixp->fx_size);
14447               code = BFD_RELOC_32_PCREL;
14448               break;
14449             case 1: code = BFD_RELOC_8_PCREL;  break;
14450             case 2: code = BFD_RELOC_16_PCREL; break;
14451             case 4: code = BFD_RELOC_32_PCREL; break;
14452 #ifdef BFD64
14453             case 8: code = BFD_RELOC_64_PCREL; break;
14454 #endif
14455             }
14456         }
14457       else
14458         {
14459           switch (fixp->fx_size)
14460             {
14461             default:
14462               as_bad_where (fixp->fx_file, fixp->fx_line,
14463                             _("can not do %d byte relocation"),
14464                             fixp->fx_size);
14465               code = BFD_RELOC_32;
14466               break;
14467             case 1: code = BFD_RELOC_8;  break;
14468             case 2: code = BFD_RELOC_16; break;
14469             case 4: code = BFD_RELOC_32; break;
14470 #ifdef BFD64
14471             case 8: code = BFD_RELOC_64; break;
14472 #endif
14473             }
14474         }
14475       break;
14476     }
14477
14478   if ((code == BFD_RELOC_32
14479        || code == BFD_RELOC_32_PCREL
14480        || code == BFD_RELOC_X86_64_32S)
14481       && GOT_symbol
14482       && fixp->fx_addsy == GOT_symbol)
14483     {
14484       if (!object_64bit)
14485         code = BFD_RELOC_386_GOTPC;
14486       else
14487         code = BFD_RELOC_X86_64_GOTPC32;
14488     }
14489   if ((code == BFD_RELOC_64 || code == BFD_RELOC_64_PCREL)
14490       && GOT_symbol
14491       && fixp->fx_addsy == GOT_symbol)
14492     {
14493       code = BFD_RELOC_X86_64_GOTPC64;
14494     }
14495
14496   rel = XNEW (arelent);
14497   rel->sym_ptr_ptr = XNEW (asymbol *);
14498   *rel->sym_ptr_ptr = symbol_get_bfdsym (fixp->fx_addsy);
14499
14500   rel->address = fixp->fx_frag->fr_address + fixp->fx_where;
14501
14502   if (!use_rela_relocations)
14503     {
14504       /* HACK: Since i386 ELF uses Rel instead of Rela, encode the
14505          vtable entry to be used in the relocation's section offset.  */
14506       if (fixp->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
14507         rel->address = fixp->fx_offset;
14508 #if defined (OBJ_COFF) && defined (TE_PE)
14509       else if (fixp->fx_addsy && S_IS_WEAK (fixp->fx_addsy))
14510         rel->addend = fixp->fx_addnumber - (S_GET_VALUE (fixp->fx_addsy) * 2);
14511       else
14512 #endif
14513       rel->addend = 0;
14514     }
14515   /* Use the rela in 64bit mode.  */
14516   else
14517     {
14518       if (disallow_64bit_reloc)
14519         switch (code)
14520           {
14521           case BFD_RELOC_X86_64_DTPOFF64:
14522           case BFD_RELOC_X86_64_TPOFF64:
14523           case BFD_RELOC_64_PCREL:
14524           case BFD_RELOC_X86_64_GOTOFF64:
14525           case BFD_RELOC_X86_64_GOT64:
14526           case BFD_RELOC_X86_64_GOTPCREL64:
14527           case BFD_RELOC_X86_64_GOTPC64:
14528           case BFD_RELOC_X86_64_GOTPLT64:
14529           case BFD_RELOC_X86_64_PLTOFF64:
14530             as_bad_where (fixp->fx_file, fixp->fx_line,
14531                           _("cannot represent relocation type %s in x32 mode"),
14532                           bfd_get_reloc_code_name (code));
14533             break;
14534           default:
14535             break;
14536           }
14537
14538       if (!fixp->fx_pcrel)
14539         rel->addend = fixp->fx_offset;
14540       else
14541         switch (code)
14542           {
14543           case BFD_RELOC_X86_64_PLT32:
14544           case BFD_RELOC_X86_64_GOT32:
14545           case BFD_RELOC_X86_64_GOTPCREL:
14546           case BFD_RELOC_X86_64_GOTPCRELX:
14547           case BFD_RELOC_X86_64_REX_GOTPCRELX:
14548           case BFD_RELOC_X86_64_TLSGD:
14549           case BFD_RELOC_X86_64_TLSLD:
14550           case BFD_RELOC_X86_64_GOTTPOFF:
14551           case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
14552           case BFD_RELOC_X86_64_TLSDESC_CALL:
14553             rel->addend = fixp->fx_offset - fixp->fx_size;
14554             break;
14555           default:
14556             rel->addend = (section->vma
14557                            - fixp->fx_size
14558                            + fixp->fx_addnumber
14559                            + md_pcrel_from (fixp));
14560             break;
14561           }
14562     }
14563
14564   rel->howto = bfd_reloc_type_lookup (stdoutput, code);
14565   if (rel->howto == NULL)
14566     {
14567       as_bad_where (fixp->fx_file, fixp->fx_line,
14568                     _("cannot represent relocation type %s"),
14569                     bfd_get_reloc_code_name (code));
14570       /* Set howto to a garbage value so that we can keep going.  */
14571       rel->howto = bfd_reloc_type_lookup (stdoutput, BFD_RELOC_32);
14572       gas_assert (rel->howto != NULL);
14573     }
14574
14575   return rel;
14576 }
14577
14578 #include "tc-i386-intel.c"
14579
14580 void
14581 tc_x86_parse_to_dw2regnum (expressionS *exp)
14582 {
14583   int saved_naked_reg;
14584   char saved_register_dot;
14585
14586   saved_naked_reg = allow_naked_reg;
14587   allow_naked_reg = 1;
14588   saved_register_dot = register_chars['.'];
14589   register_chars['.'] = '.';
14590   allow_pseudo_reg = 1;
14591   expression_and_evaluate (exp);
14592   allow_pseudo_reg = 0;
14593   register_chars['.'] = saved_register_dot;
14594   allow_naked_reg = saved_naked_reg;
14595
14596   if (exp->X_op == O_register && exp->X_add_number >= 0)
14597     {
14598       if ((addressT) exp->X_add_number < i386_regtab_size)
14599         {
14600           exp->X_op = O_constant;
14601           exp->X_add_number = i386_regtab[exp->X_add_number]
14602                               .dw2_regnum[flag_code >> 1];
14603         }
14604       else
14605         exp->X_op = O_illegal;
14606     }
14607 }
14608
14609 void
14610 tc_x86_frame_initial_instructions (void)
14611 {
14612   static unsigned int sp_regno[2];
14613
14614   if (!sp_regno[flag_code >> 1])
14615     {
14616       char *saved_input = input_line_pointer;
14617       char sp[][4] = {"esp", "rsp"};
14618       expressionS exp;
14619
14620       input_line_pointer = sp[flag_code >> 1];
14621       tc_x86_parse_to_dw2regnum (&exp);
14622       gas_assert (exp.X_op == O_constant);
14623       sp_regno[flag_code >> 1] = exp.X_add_number;
14624       input_line_pointer = saved_input;
14625     }
14626
14627   cfi_add_CFA_def_cfa (sp_regno[flag_code >> 1], -x86_cie_data_alignment);
14628   cfi_add_CFA_offset (x86_dwarf2_return_column, x86_cie_data_alignment);
14629 }
14630
14631 int
14632 x86_dwarf2_addr_size (void)
14633 {
14634 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
14635   if (x86_elf_abi == X86_64_X32_ABI)
14636     return 4;
14637 #endif
14638   return bfd_arch_bits_per_address (stdoutput) / 8;
14639 }
14640
14641 int
14642 i386_elf_section_type (const char *str, size_t len)
14643 {
14644   if (flag_code == CODE_64BIT
14645       && len == sizeof ("unwind") - 1
14646       && startswith (str, "unwind"))
14647     return SHT_X86_64_UNWIND;
14648
14649   return -1;
14650 }
14651
14652 #ifdef TE_SOLARIS
14653 void
14654 i386_solaris_fix_up_eh_frame (segT sec)
14655 {
14656   if (flag_code == CODE_64BIT)
14657     elf_section_type (sec) = SHT_X86_64_UNWIND;
14658 }
14659 #endif
14660
14661 #ifdef TE_PE
14662 void
14663 tc_pe_dwarf2_emit_offset (symbolS *symbol, unsigned int size)
14664 {
14665   expressionS exp;
14666
14667   exp.X_op = O_secrel;
14668   exp.X_add_symbol = symbol;
14669   exp.X_add_number = 0;
14670   emit_expr (&exp, size);
14671 }
14672 #endif
14673
14674 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14675 /* For ELF on x86-64, add support for SHF_X86_64_LARGE.  */
14676
14677 bfd_vma
14678 x86_64_section_letter (int letter, const char **ptr_msg)
14679 {
14680   if (flag_code == CODE_64BIT)
14681     {
14682       if (letter == 'l')
14683         return SHF_X86_64_LARGE;
14684
14685       *ptr_msg = _("bad .section directive: want a,l,w,x,M,S,G,T in string");
14686     }
14687   else
14688     *ptr_msg = _("bad .section directive: want a,w,x,M,S,G,T in string");
14689   return -1;
14690 }
14691
14692 bfd_vma
14693 x86_64_section_word (char *str, size_t len)
14694 {
14695   if (len == 5 && flag_code == CODE_64BIT && startswith (str, "large"))
14696     return SHF_X86_64_LARGE;
14697
14698   return -1;
14699 }
14700
14701 static void
14702 handle_large_common (int small ATTRIBUTE_UNUSED)
14703 {
14704   if (flag_code != CODE_64BIT)
14705     {
14706       s_comm_internal (0, elf_common_parse);
14707       as_warn (_(".largecomm supported only in 64bit mode, producing .comm"));
14708     }
14709   else
14710     {
14711       static segT lbss_section;
14712       asection *saved_com_section_ptr = elf_com_section_ptr;
14713       asection *saved_bss_section = bss_section;
14714
14715       if (lbss_section == NULL)
14716         {
14717           flagword applicable;
14718           segT seg = now_seg;
14719           subsegT subseg = now_subseg;
14720
14721           /* The .lbss section is for local .largecomm symbols.  */
14722           lbss_section = subseg_new (".lbss", 0);
14723           applicable = bfd_applicable_section_flags (stdoutput);
14724           bfd_set_section_flags (lbss_section, applicable & SEC_ALLOC);
14725           seg_info (lbss_section)->bss = 1;
14726
14727           subseg_set (seg, subseg);
14728         }
14729
14730       elf_com_section_ptr = &_bfd_elf_large_com_section;
14731       bss_section = lbss_section;
14732
14733       s_comm_internal (0, elf_common_parse);
14734
14735       elf_com_section_ptr = saved_com_section_ptr;
14736       bss_section = saved_bss_section;
14737     }
14738 }
14739 #endif /* OBJ_ELF || OBJ_MAYBE_ELF */