tools/memory_watcher/mini_disassembler.cc

   1 /* Copyright (c) 2007, Google Inc.
   2  * All rights reserved.
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions are
   6  * met:
   7  *
   8  *     * Redistributions of source code must retain the above copyright
   9  * notice, this list of conditions and the following disclaimer.
  10  *     * Redistributions in binary form must reproduce the above
  11  * copyright notice, this list of conditions and the following disclaimer
  12  * in the documentation and/or other materials provided with the
  13  * distribution.
  14  *     * Neither the name of Google Inc. nor the names of its
  15  * contributors may be used to endorse or promote products derived from
  16  * this software without specific prior written permission.
  17  *
  18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29  *
  30  * ---
  31  *
  32  * Implementation of MiniDisassembler.
  33  */
  34
  35 #include "mini_disassembler.h"
  36
  37 namespace sidestep {
  38
  39 MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits,
  40                                    bool address_default_is_32_bits)
  41     : operand_default_is_32_bits_(operand_default_is_32_bits),
  42       address_default_is_32_bits_(address_default_is_32_bits) {
  43   Initialize();
  44 }
  45
  46 MiniDisassembler::MiniDisassembler()
  47     : operand_default_is_32_bits_(true),
  48       address_default_is_32_bits_(true) {
  49   Initialize();
  50 }
  51
  52 InstructionType MiniDisassembler::Disassemble(
  53     unsigned char* start_byte,
  54     unsigned int& instruction_bytes) {
  55   // Clean up any state from previous invocations.
  56   Initialize();
  57
  58   // Start by processing any prefixes.
  59   unsigned char* current_byte = start_byte;
  60   unsigned int size = 0;
  61   InstructionType instruction_type = ProcessPrefixes(current_byte, size);
  62
  63   if (IT_UNKNOWN == instruction_type)
  64     return instruction_type;
  65
  66   current_byte += size;
  67   size = 0;
  68
  69   // Invariant: We have stripped all prefixes, and the operand_is_32_bits_
  70   // and address_is_32_bits_ flags are correctly set.
  71
  72   instruction_type = ProcessOpcode(current_byte, 0, size);
  73
  74   // Check for error processing instruction
  75   if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) {
  76     return IT_UNKNOWN;
  77   }
  78
  79   current_byte += size;
  80
  81   // Invariant: operand_bytes_ indicates the total size of operands
  82   // specified by the opcode and/or ModR/M byte and/or SIB byte.
  83   // pCurrentByte points to the first byte after the ModR/M byte, or after
  84   // the SIB byte if it is present (i.e. the first byte of any operands
  85   // encoded in the instruction).
  86
  87   // We get the total length of any prefixes, the opcode, and the ModR/M and
  88   // SIB bytes if present, by taking the difference of the original starting
  89   // address and the current byte (which points to the first byte of the
  90   // operands if present, or to the first byte of the next instruction if
  91   // they are not).  Adding the count of bytes in the operands encoded in
  92   // the instruction gives us the full length of the instruction in bytes.
  93   instruction_bytes += operand_bytes_ + (current_byte - start_byte);
  94
  95   // Return the instruction type, which was set by ProcessOpcode().
  96   return instruction_type_;
  97 }
  98
  99 void MiniDisassembler::Initialize() {
 100   operand_is_32_bits_ = operand_default_is_32_bits_;
 101   address_is_32_bits_ = address_default_is_32_bits_;
 102   operand_bytes_ = 0;
 103   have_modrm_ = false;
 104   should_decode_modrm_ = false;
 105   instruction_type_ = IT_UNKNOWN;
 106   got_f2_prefix_ = false;
 107   got_f3_prefix_ = false;
 108   got_66_prefix_ = false;
 109 }
 110
 111 InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte,
 112                                                   unsigned int& size) {
 113   InstructionType instruction_type = IT_GENERIC;
 114   const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte];
 115
 116   switch (opcode.type_) {
 117     case IT_PREFIX_ADDRESS:
 118       address_is_32_bits_ = !address_default_is_32_bits_;
 119       goto nochangeoperand;
 120     case IT_PREFIX_OPERAND:
 121       operand_is_32_bits_ = !operand_default_is_32_bits_;
 122       nochangeoperand:
 123     case IT_PREFIX:
 124
 125       if (0xF2 == (*start_byte))
 126         got_f2_prefix_ = true;
 127       else if (0xF3 == (*start_byte))
 128         got_f3_prefix_ = true;
 129       else if (0x66 == (*start_byte))
 130         got_66_prefix_ = true;
 131
 132       instruction_type = opcode.type_;
 133       size ++;
 134       // we got a prefix, so add one and check next byte
 135       ProcessPrefixes(start_byte + 1, size);
 136     default:
 137       break;   // not a prefix byte
 138   }
 139
 140   return instruction_type;
 141 }
 142
 143 InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte,
 144                                                 unsigned int table_index,
 145                                                 unsigned int& size) {
 146   const OpcodeTable& table = s_ia32_opcode_map_[table_index];   // Get our table
 147   unsigned char current_byte = (*start_byte) >> table.shift_;
 148   current_byte = current_byte & table.mask_;  // Mask out the bits we will use
 149
 150   // Check whether the byte we have is inside the table we have.
 151   if (current_byte < table.min_lim_ || current_byte > table.max_lim_) {
 152     instruction_type_ = IT_UNKNOWN;
 153     return instruction_type_;
 154   }
 155
 156   const Opcode& opcode = table.table_[current_byte];
 157   if (IT_UNUSED == opcode.type_) {
 158     // This instruction is not used by the IA-32 ISA, so we indicate
 159     // this to the user.  Probably means that we were pointed to
 160     // a byte in memory that was not the start of an instruction.
 161     instruction_type_ = IT_UNUSED;
 162     return instruction_type_;
 163   } else if (IT_REFERENCE == opcode.type_) {
 164     // We are looking at an opcode that has more bytes (or is continued
 165     // in the ModR/M byte).  Recursively find the opcode definition in
 166     // the table for the opcode's next byte.
 167     size++;
 168     ProcessOpcode(start_byte + 1, opcode.table_index_, size);
 169     return instruction_type_;
 170   }
 171
 172   const SpecificOpcode* specific_opcode = (SpecificOpcode*)&opcode;
 173   if (opcode.is_prefix_dependent_) {
 174     if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) {
 175       specific_opcode = &opcode.opcode_if_f2_prefix_;
 176     } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) {
 177       specific_opcode = &opcode.opcode_if_f3_prefix_;
 178     } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) {
 179       specific_opcode = &opcode.opcode_if_66_prefix_;
 180     }
 181   }
 182
 183   // Inv: The opcode type is known.
 184   instruction_type_ = specific_opcode->type_;
 185
 186   // Let's process the operand types to see if we have any immediate
 187   // operands, and/or a ModR/M byte.
 188
 189   ProcessOperand(specific_opcode->flag_dest_);
 190   ProcessOperand(specific_opcode->flag_source_);
 191   ProcessOperand(specific_opcode->flag_aux_);
 192
 193   // Inv: We have processed the opcode and incremented operand_bytes_
 194   // by the number of bytes of any operands specified by the opcode
 195   // that are stored in the instruction (not registers etc.).  Now
 196   // we need to return the total number of bytes for the opcode and
 197   // for the ModR/M or SIB bytes if they are present.
 198
 199   if (table.mask_ != 0xff) {
 200     if (have_modrm_) {
 201       // we're looking at a ModR/M byte so we're not going to
 202       // count that into the opcode size
 203       ProcessModrm(start_byte, size);
 204       return IT_GENERIC;
 205     } else {
 206       // need to count the ModR/M byte even if it's just being
 207       // used for opcode extension
 208       size++;
 209       return IT_GENERIC;
 210     }
 211   } else {
 212     if (have_modrm_) {
 213       // The ModR/M byte is the next byte.
 214       size++;
 215       ProcessModrm(start_byte + 1, size);
 216       return IT_GENERIC;
 217     } else {
 218       size++;
 219       return IT_GENERIC;
 220     }
 221   }
 222 }
 223
 224 bool MiniDisassembler::ProcessOperand(int flag_operand) {
 225   bool succeeded = true;
 226   if (AM_NOT_USED == flag_operand)
 227     return succeeded;
 228
 229   // Decide what to do based on the addressing mode.
 230   switch (flag_operand & AM_MASK) {
 231     // No ModR/M byte indicated by these addressing modes, and no
 232     // additional (e.g. immediate) parameters.
 233     case AM_A: // Direct address
 234     case AM_F: // EFLAGS register
 235     case AM_X: // Memory addressed by the DS:SI register pair
 236     case AM_Y: // Memory addressed by the ES:DI register pair
 237     case AM_IMPLICIT: // Parameter is implicit, occupies no space in
 238                        // instruction
 239       break;
 240
 241     // There is a ModR/M byte but it does not necessarily need
 242     // to be decoded.
 243     case AM_C: // reg field of ModR/M selects a control register
 244     case AM_D: // reg field of ModR/M selects a debug register
 245     case AM_G: // reg field of ModR/M selects a general register
 246     case AM_P: // reg field of ModR/M selects an MMX register
 247     case AM_R: // mod field of ModR/M may refer only to a general register
 248     case AM_S: // reg field of ModR/M selects a segment register
 249     case AM_T: // reg field of ModR/M selects a test register
 250     case AM_V: // reg field of ModR/M selects a 128-bit XMM register
 251       have_modrm_ = true;
 252       break;
 253
 254     // In these addressing modes, there is a ModR/M byte and it needs to be
 255     // decoded. No other (e.g. immediate) params than indicated in ModR/M.
 256     case AM_E: // Operand is either a general-purpose register or memory,
 257                  // specified by ModR/M byte
 258     case AM_M: // ModR/M byte will refer only to memory
 259     case AM_Q: // Operand is either an MMX register or memory (complex
 260                  // evaluation), specified by ModR/M byte
 261     case AM_W: // Operand is either a 128-bit XMM register or memory (complex
 262                  // eval), specified by ModR/M byte
 263       have_modrm_ = true;
 264       should_decode_modrm_ = true;
 265       break;
 266
 267     // These addressing modes specify an immediate or an offset value
 268     // directly, so we need to look at the operand type to see how many
 269     // bytes.
 270     case AM_I: // Immediate data.
 271     case AM_J: // Jump to offset.
 272     case AM_O: // Operand is at offset.
 273       switch (flag_operand & OT_MASK) {
 274         case OT_B: // Byte regardless of operand-size attribute.
 275           operand_bytes_ += OS_BYTE;
 276           break;
 277         case OT_C: // Byte or word, depending on operand-size attribute.
 278           if (operand_is_32_bits_)
 279             operand_bytes_ += OS_WORD;
 280           else
 281             operand_bytes_ += OS_BYTE;
 282           break;
 283         case OT_D: // Doubleword, regardless of operand-size attribute.
 284           operand_bytes_ += OS_DOUBLE_WORD;
 285           break;
 286         case OT_DQ: // Double-quadword, regardless of operand-size attribute.
 287           operand_bytes_ += OS_DOUBLE_QUAD_WORD;
 288           break;
 289         case OT_P: // 32-bit or 48-bit pointer, depending on operand-size
 290                      // attribute.
 291           if (operand_is_32_bits_)
 292             operand_bytes_ += OS_48_BIT_POINTER;
 293           else
 294             operand_bytes_ += OS_32_BIT_POINTER;
 295           break;
 296         case OT_PS: // 128-bit packed single-precision floating-point data.
 297           operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING;
 298           break;
 299         case OT_Q: // Quadword, regardless of operand-size attribute.
 300           operand_bytes_ += OS_QUAD_WORD;
 301           break;
 302         case OT_S: // 6-byte pseudo-descriptor.
 303           operand_bytes_ += OS_PSEUDO_DESCRIPTOR;
 304           break;
 305         case OT_SD: // Scalar Double-Precision Floating-Point Value
 306         case OT_PD: // Unaligned packed double-precision floating point value
 307           operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING;
 308           break;
 309         case OT_SS:
 310           // Scalar element of a 128-bit packed single-precision
 311           // floating data.
 312           // We simply return enItUnknown since we don't have to support
 313           // floating point
 314           succeeded = false;
 315           break;
 316         case OT_V: // Word or doubleword, depending on operand-size attribute.
 317           if (operand_is_32_bits_)
 318             operand_bytes_ += OS_DOUBLE_WORD;
 319           else
 320             operand_bytes_ += OS_WORD;
 321           break;
 322         case OT_W: // Word, regardless of operand-size attribute.
 323           operand_bytes_ += OS_WORD;
 324           break;
 325
 326         // Can safely ignore these.
 327         case OT_A: // Two one-word operands in memory or two double-word
 328                      // operands in memory
 329         case OT_PI: // Quadword MMX technology register (e.g. mm0)
 330         case OT_SI: // Doubleword integer register (e.g., eax)
 331           break;
 332
 333         default:
 334           break;
 335       }
 336       break;
 337
 338     default:
 339       break;
 340   }
 341
 342   return succeeded;
 343 }
 344
 345 bool MiniDisassembler::ProcessModrm(unsigned char* start_byte,
 346                                     unsigned int& size) {
 347   // If we don't need to decode, we just return the size of the ModR/M
 348   // byte (there is never a SIB byte in this case).
 349   if (!should_decode_modrm_) {
 350     size++;
 351     return true;
 352   }
 353
 354   // We never care about the reg field, only the combination of the mod
 355   // and r/m fields, so let's start by packing those fields together into
 356   // 5 bits.
 357   unsigned char modrm = (*start_byte);
 358   unsigned char mod = modrm & 0xC0; // mask out top two bits to get mod field
 359   modrm = modrm & 0x07; // mask out bottom 3 bits to get r/m field
 360   mod = mod >> 3; // shift the mod field to the right place
 361   modrm = mod | modrm; // combine the r/m and mod fields as discussed
 362   mod = mod >> 3; // shift the mod field to bits 2..0
 363
 364   // Invariant: modrm contains the mod field in bits 4..3 and the r/m field
 365   // in bits 2..0, and mod contains the mod field in bits 2..0
 366
 367   const ModrmEntry* modrm_entry = 0;
 368   if (address_is_32_bits_)
 369     modrm_entry = &s_ia32_modrm_map_[modrm];
 370   else
 371     modrm_entry = &s_ia16_modrm_map_[modrm];
 372
 373   // Invariant: modrm_entry points to information that we need to decode
 374   // the ModR/M byte.
 375
 376   // Add to the count of operand bytes, if the ModR/M byte indicates
 377   // that some operands are encoded in the instruction.
 378   if (modrm_entry->is_encoded_in_instruction_)
 379     operand_bytes_ += modrm_entry->operand_size_;
 380
 381   // Process the SIB byte if necessary, and return the count
 382   // of ModR/M and SIB bytes.
 383   if (modrm_entry->use_sib_byte_) {
 384     size++;
 385     return ProcessSib(start_byte + 1, mod, size);
 386   } else {
 387     size++;
 388     return true;
 389   }
 390 }
 391
 392 bool MiniDisassembler::ProcessSib(unsigned char* start_byte,
 393                                   unsigned char mod,
 394                                   unsigned int& size) {
 395   // get the mod field from the 2..0 bits of the SIB byte
 396   unsigned char sib_base = (*start_byte) & 0x07;
 397   if (0x05 == sib_base) {
 398     switch (mod) {
 399     case 0x00: // mod == 00
 400     case 0x02: // mod == 10
 401       operand_bytes_ += OS_DOUBLE_WORD;
 402       break;
 403     case 0x01: // mod == 01
 404       operand_bytes_ += OS_BYTE;
 405       break;
 406     case 0x03: // mod == 11
 407       // According to the IA-32 docs, there does not seem to be a disp
 408       // value for this value of mod
 409     default:
 410       break;
 411     }
 412   }
 413
 414   size++;
 415   return true;
 416 }
 417
 418 };  // namespace sidestep