Refactor pinch-zoom viewport.
[chromium-blink-merge.git] / courgette / disassembler_elf_32_x86.cc
blob6e3935c0ed9b2313099bce50f0c9f9645341fc2d
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "courgette/disassembler_elf_32_x86.h"
7 #include <algorithm>
8 #include <string>
9 #include <vector>
11 #include "base/basictypes.h"
12 #include "base/logging.h"
14 #include "courgette/assembly_program.h"
15 #include "courgette/courgette.h"
16 #include "courgette/encoded_program.h"
18 namespace courgette {
20 DisassemblerElf32X86::DisassemblerElf32X86(const void* start, size_t length)
21 : Disassembler(start, length),
22 header_(NULL),
23 section_header_table_(NULL),
24 section_header_table_size_(0),
25 program_header_table_(NULL),
26 program_header_table_size_(0),
27 default_string_section_(NULL) {
30 bool DisassemblerElf32X86::ParseHeader() {
31 if (length() < sizeof(Elf32_Ehdr))
32 return Bad("Too small");
34 header_ = (Elf32_Ehdr *)start();
36 // Have magic for elf header?
37 if (header_->e_ident[0] != 0x7f ||
38 header_->e_ident[1] != 'E' ||
39 header_->e_ident[2] != 'L' ||
40 header_->e_ident[3] != 'F')
41 return Bad("No Magic Number");
43 if (header_->e_type != ET_EXEC &&
44 header_->e_type != ET_DYN)
45 return Bad("Not an executable file or shared library");
47 if (header_->e_machine != EM_386)
48 return Bad("Not a supported architecture");
50 if (header_->e_version != 1)
51 return Bad("Unknown file version");
53 if (header_->e_shentsize != sizeof(Elf32_Shdr))
54 return Bad("Unexpected section header size");
56 if (header_->e_shoff >= length())
57 return Bad("Out of bounds section header table offset");
59 section_header_table_ = (Elf32_Shdr *)OffsetToPointer(header_->e_shoff);
60 section_header_table_size_ = header_->e_shnum;
62 if ((header_->e_shoff + header_->e_shnum ) >= length())
63 return Bad("Out of bounds section header table");
65 if (header_->e_phoff >= length())
66 return Bad("Out of bounds program header table offset");
68 program_header_table_ = (Elf32_Phdr *)OffsetToPointer(header_->e_phoff);
69 program_header_table_size_ = header_->e_phnum;
71 if ((header_->e_phoff + header_->e_phnum) >= length())
72 return Bad("Out of bounds program header table");
74 default_string_section_ = (const char *)SectionBody((int)header_->e_shstrndx);
76 ReduceLength(DiscoverLength());
78 return Good();
81 bool DisassemblerElf32X86::Disassemble(AssemblyProgram* target) {
82 if (!ok())
83 return false;
85 // The Image Base is always 0 for ELF Executables
86 target->set_image_base(0);
88 if (!ParseAbs32Relocs())
89 return false;
91 if (!ParseRel32RelocsFromSections())
92 return false;
94 if (!ParseFile(target))
95 return false;
97 target->DefaultAssignIndexes();
99 return true;
102 uint32 DisassemblerElf32X86::DiscoverLength() {
103 uint32 result = 0;
105 // Find the end of the last section
106 for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) {
107 const Elf32_Shdr *section_header = SectionHeader(section_id);
109 if (section_header->sh_type == SHT_NOBITS)
110 continue;
112 uint32 section_end = section_header->sh_offset + section_header->sh_size;
114 if (section_end > result)
115 result = section_end;
118 // Find the end of the last segment
119 for (int i = 0; i < ProgramSegmentHeaderCount(); i++) {
120 const Elf32_Phdr *segment_header = ProgramSegmentHeader(i);
122 uint32 segment_end = segment_header->p_offset + segment_header->p_filesz;
124 if (segment_end > result)
125 result = segment_end;
128 uint32 section_table_end = header_->e_shoff +
129 (header_->e_shnum * sizeof(Elf32_Shdr));
130 if (section_table_end > result)
131 result = section_table_end;
133 uint32 segment_table_end = header_->e_phoff +
134 (header_->e_phnum * sizeof(Elf32_Phdr));
135 if (segment_table_end > result)
136 result = segment_table_end;
138 return result;
141 CheckBool DisassemblerElf32X86::IsValidRVA(RVA rva) const {
143 // It's valid if it's contained in any program segment
144 for (int i = 0; i < ProgramSegmentHeaderCount(); i++) {
145 const Elf32_Phdr *segment_header = ProgramSegmentHeader(i);
147 if (segment_header->p_type != PT_LOAD)
148 continue;
150 Elf32_Addr begin = segment_header->p_vaddr;
151 Elf32_Addr end = segment_header->p_vaddr + segment_header->p_memsz;
153 if (rva >= begin && rva < end)
154 return true;
157 return false;
160 // Convert an ELF relocation struction into an RVA
161 CheckBool DisassemblerElf32X86::RelToRVA(Elf32_Rel rel, RVA* result) const {
163 // The rightmost byte of r_info is the type...
164 elf32_rel_386_type_values type =
165 (elf32_rel_386_type_values)(unsigned char)rel.r_info;
167 // The other 3 bytes of r_info are the symbol
168 uint32 symbol = rel.r_info >> 8;
170 switch(type)
172 case R_386_NONE:
173 case R_386_32:
174 case R_386_PC32:
175 case R_386_GOT32:
176 case R_386_PLT32:
177 case R_386_COPY:
178 case R_386_GLOB_DAT:
179 case R_386_JMP_SLOT:
180 return false;
182 case R_386_RELATIVE:
183 if (symbol != 0)
184 return false;
186 // This is a basic ABS32 relocation address
187 *result = rel.r_offset;
188 return true;
190 case R_386_GOTOFF:
191 case R_386_GOTPC:
192 case R_386_TLS_TPOFF:
193 return false;
196 return false;
199 // Returns RVA for an in memory address, or NULL.
200 CheckBool DisassemblerElf32X86::RVAToFileOffset(Elf32_Addr addr,
201 size_t* result) const {
203 for (int i = 0; i < ProgramSegmentHeaderCount(); i++) {
204 Elf32_Addr begin = ProgramSegmentMemoryBegin(i);
205 Elf32_Addr end = begin + ProgramSegmentMemorySize(i);
207 if (addr >= begin && addr < end) {
208 Elf32_Addr offset = addr - begin;
210 if (offset < ProgramSegmentFileSize(i)) {
211 *result = ProgramSegmentFileOffset(i) + offset;
212 return true;
217 return false;
220 RVA DisassemblerElf32X86::FileOffsetToRVA(size_t offset) const {
221 // File offsets can be 64 bit values, but we are dealing with 32
222 // bit executables and so only need to support 32bit file sizes.
223 uint32 offset32 = (uint32)offset;
225 for (int i = 0; i < SectionHeaderCount(); i++) {
227 const Elf32_Shdr *section_header = SectionHeader(i);
229 // These can appear to have a size in the file, but don't.
230 if (section_header->sh_type == SHT_NOBITS)
231 continue;
233 Elf32_Off section_begin = section_header->sh_offset;
234 Elf32_Off section_end = section_begin + section_header->sh_size;
236 if (offset32 >= section_begin && offset32 < section_end) {
237 return section_header->sh_addr + (offset32 - section_begin);
241 return 0;
244 CheckBool DisassemblerElf32X86::RVAsToOffsets(std::vector<RVA>* rvas,
245 std::vector<size_t>* offsets) {
246 offsets->clear();
248 for (std::vector<RVA>::iterator rva = rvas->begin();
249 rva != rvas->end();
250 rva++) {
252 size_t offset;
254 if (!RVAToFileOffset(*rva, &offset))
255 return false;
257 offsets->push_back(offset);
260 return true;
263 CheckBool DisassemblerElf32X86::ParseFile(AssemblyProgram* program) {
264 // Walk all the bytes in the file, whether or not in a section.
265 uint32 file_offset = 0;
267 std::vector<size_t> abs_offsets;
268 std::vector<size_t> rel_offsets;
270 if (!RVAsToOffsets(&abs32_locations_, &abs_offsets))
271 return false;
273 if (!RVAsToOffsets(&rel32_locations_, &rel_offsets))
274 return false;
276 std::vector<size_t>::iterator current_abs_offset = abs_offsets.begin();
277 std::vector<size_t>::iterator current_rel_offset = rel_offsets.begin();
279 std::vector<size_t>::iterator end_abs_offset = abs_offsets.end();
280 std::vector<size_t>::iterator end_rel_offset = rel_offsets.end();
282 for (int section_id = 0;
283 section_id < SectionHeaderCount();
284 section_id++) {
286 const Elf32_Shdr *section_header = SectionHeader(section_id);
288 if (!ParseSimpleRegion(file_offset,
289 section_header->sh_offset,
290 program))
291 return false;
292 file_offset = section_header->sh_offset;
294 switch (section_header->sh_type) {
295 case SHT_REL:
296 if (!ParseRelocationSection(section_header, program))
297 return false;
298 file_offset = section_header->sh_offset + section_header->sh_size;
299 break;
300 case SHT_PROGBITS:
301 if (!ParseProgbitsSection(section_header,
302 &current_abs_offset, end_abs_offset,
303 &current_rel_offset, end_rel_offset,
304 program))
305 return false;
306 file_offset = section_header->sh_offset + section_header->sh_size;
307 break;
308 default:
309 break;
313 // Rest of the file past the last section
314 if (!ParseSimpleRegion(file_offset,
315 length(),
316 program))
317 return false;
319 // Make certain we consume all of the relocations as expected
320 return (current_abs_offset == end_abs_offset);
323 CheckBool DisassemblerElf32X86::ParseRelocationSection(
324 const Elf32_Shdr *section_header,
325 AssemblyProgram* program) {
326 // We can reproduce the R_386_RELATIVE entries in one of the relocation
327 // table based on other information in the patch, given these
328 // conditions....
330 // All R_386_RELATIVE entries are:
331 // 1) In the same relocation table
332 // 2) Are consecutive
333 // 3) Are sorted in memory address order
335 // Happily, this is normally the case, but it's not required by spec
336 // so we check, and just don't do it if we don't match up.
338 // The expectation is that one relocation section will contain
339 // all of our R_386_RELATIVE entries in the expected order followed
340 // by assorted other entries we can't use special handling for.
342 bool match = true;
344 // Walk all the bytes in the section, matching relocation table or not
345 size_t file_offset = section_header->sh_offset;
346 size_t section_end = section_header->sh_offset + section_header->sh_size;
348 Elf32_Rel *section_relocs_iter =
349 (Elf32_Rel *)OffsetToPointer(section_header->sh_offset);
351 uint32 section_relocs_count = section_header->sh_size /
352 section_header->sh_entsize;
354 if (abs32_locations_.size() > section_relocs_count)
355 match = false;
357 std::vector<RVA>::iterator reloc_iter = abs32_locations_.begin();
359 while (match && (reloc_iter != abs32_locations_.end())) {
360 if (section_relocs_iter->r_info != R_386_RELATIVE ||
361 section_relocs_iter->r_offset != *reloc_iter)
362 match = false;
363 section_relocs_iter++;
364 reloc_iter++;
367 if (match) {
368 // Skip over relocation tables
369 if (!program->EmitElfRelocationInstruction())
370 return false;
371 file_offset += sizeof(Elf32_Rel) * abs32_locations_.size();
374 return ParseSimpleRegion(file_offset, section_end, program);
377 CheckBool DisassemblerElf32X86::ParseProgbitsSection(
378 const Elf32_Shdr *section_header,
379 std::vector<size_t>::iterator* current_abs_offset,
380 std::vector<size_t>::iterator end_abs_offset,
381 std::vector<size_t>::iterator* current_rel_offset,
382 std::vector<size_t>::iterator end_rel_offset,
383 AssemblyProgram* program) {
385 // Walk all the bytes in the file, whether or not in a section.
386 size_t file_offset = section_header->sh_offset;
387 size_t section_end = section_header->sh_offset + section_header->sh_size;
389 Elf32_Addr origin = section_header->sh_addr;
390 size_t origin_offset = section_header->sh_offset;
391 if (!program->EmitOriginInstruction(origin))
392 return false;
394 while (file_offset < section_end) {
396 if (*current_abs_offset != end_abs_offset &&
397 file_offset > **current_abs_offset)
398 return false;
400 while (*current_rel_offset != end_rel_offset &&
401 file_offset > **current_rel_offset) {
402 (*current_rel_offset)++;
405 size_t next_relocation = section_end;
407 if (*current_abs_offset != end_abs_offset &&
408 next_relocation > **current_abs_offset)
409 next_relocation = **current_abs_offset;
411 // Rel offsets are heuristically derived, and might (incorrectly) overlap
412 // an Abs value, or the end of the section, so +3 to make sure there is
413 // room for the full 4 byte value.
414 if (*current_rel_offset != end_rel_offset &&
415 next_relocation > (**current_rel_offset + 3))
416 next_relocation = **current_rel_offset;
418 if (next_relocation > file_offset) {
419 if (!ParseSimpleRegion(file_offset, next_relocation, program))
420 return false;
422 file_offset = next_relocation;
423 continue;
426 if (*current_abs_offset != end_abs_offset &&
427 file_offset == **current_abs_offset) {
429 const uint8* p = OffsetToPointer(file_offset);
430 RVA target_rva = Read32LittleEndian(p);
432 if (!program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva)))
433 return false;
434 file_offset += sizeof(RVA);
435 (*current_abs_offset)++;
436 continue;
439 if (*current_rel_offset != end_rel_offset &&
440 file_offset == **current_rel_offset) {
442 const uint8* p = OffsetToPointer(file_offset);
443 uint32 relative_target = Read32LittleEndian(p);
444 // This cast is for 64 bit systems, and is only safe because we
445 // are working on 32 bit executables.
446 RVA target_rva = (RVA)(origin + (file_offset - origin_offset) +
447 4 + relative_target);
449 if (!program->EmitRel32(program->FindOrMakeRel32Label(target_rva)))
450 return false;
451 file_offset += sizeof(RVA);
452 (*current_rel_offset)++;
453 continue;
457 // Rest of the section (if any)
458 return ParseSimpleRegion(file_offset, section_end, program);
461 CheckBool DisassemblerElf32X86::ParseSimpleRegion(
462 size_t start_file_offset,
463 size_t end_file_offset,
464 AssemblyProgram* program) {
466 const uint8* start = OffsetToPointer(start_file_offset);
467 const uint8* end = OffsetToPointer(end_file_offset);
469 const uint8* p = start;
471 while (p < end) {
472 if (!program->EmitByteInstruction(*p))
473 return false;
474 ++p;
477 return true;
480 CheckBool DisassemblerElf32X86::ParseAbs32Relocs() {
481 abs32_locations_.clear();
483 // Loop through sections for relocation sections
484 for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) {
485 const Elf32_Shdr *section_header = SectionHeader(section_id);
487 if (section_header->sh_type == SHT_REL) {
489 Elf32_Rel *relocs_table = (Elf32_Rel *)SectionBody(section_id);
491 int relocs_table_count = section_header->sh_size /
492 section_header->sh_entsize;
494 // Elf32_Word relocation_section_id = section_header->sh_info;
496 // Loop through relocation objects in the relocation section
497 for (int rel_id = 0; rel_id < relocs_table_count; rel_id++) {
498 RVA rva;
500 // Quite a few of these conversions fail, and we simply skip
501 // them, that's okay.
502 if (RelToRVA(relocs_table[rel_id], &rva))
503 abs32_locations_.push_back(rva);
508 std::sort(abs32_locations_.begin(), abs32_locations_.end());
509 return true;
512 CheckBool DisassemblerElf32X86::ParseRel32RelocsFromSections() {
514 rel32_locations_.clear();
516 // Loop through sections for relocation sections
517 for (int section_id = 0;
518 section_id < SectionHeaderCount();
519 section_id++) {
521 const Elf32_Shdr *section_header = SectionHeader(section_id);
523 if (section_header->sh_type != SHT_PROGBITS)
524 continue;
526 if (!ParseRel32RelocsFromSection(section_header))
527 return false;
530 std::sort(rel32_locations_.begin(), rel32_locations_.end());
531 return true;
534 CheckBool DisassemblerElf32X86::ParseRel32RelocsFromSection(
535 const Elf32_Shdr* section_header) {
537 uint32 start_file_offset = section_header->sh_offset;
538 uint32 end_file_offset = start_file_offset + section_header->sh_size;
540 const uint8* start_pointer = OffsetToPointer(start_file_offset);
541 const uint8* end_pointer = OffsetToPointer(end_file_offset);
543 // Quick way to convert from Pointer to RVA within a single Section is to
544 // subtract 'pointer_to_rva'.
545 const uint8* const adjust_pointer_to_rva = start_pointer -
546 section_header->sh_addr;
548 // Find the rel32 relocations.
549 const uint8* p = start_pointer;
550 while (p < end_pointer) {
551 //RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva);
553 // Heuristic discovery of rel32 locations in instruction stream: are the
554 // next few bytes the start of an instruction containing a rel32
555 // addressing mode?
556 const uint8* rel32 = NULL;
558 if (p + 5 <= end_pointer) {
559 if (*p == 0xE8 || *p == 0xE9) { // jmp rel32 and call rel32
560 rel32 = p + 1;
563 if (p + 6 <= end_pointer) {
564 if (*p == 0x0F && (*(p+1) & 0xF0) == 0x80) { // Jcc long form
565 if (p[1] != 0x8A && p[1] != 0x8B) // JPE/JPO unlikely
566 rel32 = p + 2;
569 if (rel32) {
570 RVA rel32_rva = static_cast<RVA>(rel32 - adjust_pointer_to_rva);
572 RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32);
573 // To be valid, rel32 target must be within image, and within this
574 // section.
575 if (IsValidRVA(target_rva)) {
576 rel32_locations_.push_back(rel32_rva);
577 #if COURGETTE_HISTOGRAM_TARGETS
578 ++rel32_target_rvas_[target_rva];
579 #endif
580 p = rel32 + 4;
581 continue;
584 p += 1;
587 return true;
590 } // namespace courgette