1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "courgette/disassembler_elf_32.h"
11 #include "base/basictypes.h"
12 #include "base/logging.h"
13 #include "base/memory/scoped_vector.h"
15 #include "courgette/assembly_program.h"
16 #include "courgette/courgette.h"
17 #include "courgette/encoded_program.h"
21 DisassemblerElf32::DisassemblerElf32(const void* start
, size_t length
)
22 : Disassembler(start
, length
),
24 section_header_table_(NULL
),
25 section_header_table_size_(0),
26 program_header_table_(NULL
),
27 program_header_table_size_(0),
28 default_string_section_(NULL
) {
31 bool DisassemblerElf32::ParseHeader() {
32 if (length() < sizeof(Elf32_Ehdr
))
33 return Bad("Too small");
35 header_
= (Elf32_Ehdr
*)start();
37 // Have magic for elf header?
38 if (header_
->e_ident
[0] != 0x7f ||
39 header_
->e_ident
[1] != 'E' ||
40 header_
->e_ident
[2] != 'L' ||
41 header_
->e_ident
[3] != 'F')
42 return Bad("No Magic Number");
44 if (header_
->e_type
!= ET_EXEC
&&
45 header_
->e_type
!= ET_DYN
)
46 return Bad("Not an executable file or shared library");
48 if (header_
->e_machine
!= ElfEM())
49 return Bad("Not a supported architecture");
51 if (header_
->e_version
!= 1)
52 return Bad("Unknown file version");
54 if (header_
->e_shentsize
!= sizeof(Elf32_Shdr
))
55 return Bad("Unexpected section header size");
57 if (header_
->e_shoff
>= length())
58 return Bad("Out of bounds section header table offset");
60 section_header_table_
= (Elf32_Shdr
*)OffsetToPointer(header_
->e_shoff
);
61 section_header_table_size_
= header_
->e_shnum
;
63 if ((header_
->e_shoff
+ header_
->e_shnum
) >= length())
64 return Bad("Out of bounds section header table");
66 if (header_
->e_phoff
>= length())
67 return Bad("Out of bounds program header table offset");
69 program_header_table_
= (Elf32_Phdr
*)OffsetToPointer(header_
->e_phoff
);
70 program_header_table_size_
= header_
->e_phnum
;
72 if ((header_
->e_phoff
+ header_
->e_phnum
) >= length())
73 return Bad("Out of bounds program header table");
75 default_string_section_
= (const char *)SectionBody((int)header_
->e_shstrndx
);
77 ReduceLength(DiscoverLength());
82 bool DisassemblerElf32::Disassemble(AssemblyProgram
* target
) {
86 // The Image Base is always 0 for ELF Executables
87 target
->set_image_base(0);
89 if (!ParseAbs32Relocs())
92 if (!ParseRel32RelocsFromSections())
95 if (!ParseFile(target
))
98 target
->DefaultAssignIndexes();
103 uint32
DisassemblerElf32::DiscoverLength() {
106 // Find the end of the last section
107 for (int section_id
= 0; section_id
< SectionHeaderCount(); section_id
++) {
108 const Elf32_Shdr
*section_header
= SectionHeader(section_id
);
110 if (section_header
->sh_type
== SHT_NOBITS
)
113 uint32 section_end
= section_header
->sh_offset
+ section_header
->sh_size
;
115 if (section_end
> result
)
116 result
= section_end
;
119 // Find the end of the last segment
120 for (int i
= 0; i
< ProgramSegmentHeaderCount(); i
++) {
121 const Elf32_Phdr
*segment_header
= ProgramSegmentHeader(i
);
123 uint32 segment_end
= segment_header
->p_offset
+ segment_header
->p_filesz
;
125 if (segment_end
> result
)
126 result
= segment_end
;
129 uint32 section_table_end
= header_
->e_shoff
+
130 (header_
->e_shnum
* sizeof(Elf32_Shdr
));
131 if (section_table_end
> result
)
132 result
= section_table_end
;
134 uint32 segment_table_end
= header_
->e_phoff
+
135 (header_
->e_phnum
* sizeof(Elf32_Phdr
));
136 if (segment_table_end
> result
)
137 result
= segment_table_end
;
142 CheckBool
DisassemblerElf32::IsValidRVA(RVA rva
) const {
144 // It's valid if it's contained in any program segment
145 for (int i
= 0; i
< ProgramSegmentHeaderCount(); i
++) {
146 const Elf32_Phdr
*segment_header
= ProgramSegmentHeader(i
);
148 if (segment_header
->p_type
!= PT_LOAD
)
151 Elf32_Addr begin
= segment_header
->p_vaddr
;
152 Elf32_Addr end
= segment_header
->p_vaddr
+ segment_header
->p_memsz
;
154 if (rva
>= begin
&& rva
< end
)
161 // Returns RVA for an in memory address, or NULL.
162 CheckBool
DisassemblerElf32::RVAToFileOffset(Elf32_Addr addr
,
163 size_t* result
) const {
165 for (int i
= 0; i
< ProgramSegmentHeaderCount(); i
++) {
166 Elf32_Addr begin
= ProgramSegmentMemoryBegin(i
);
167 Elf32_Addr end
= begin
+ ProgramSegmentMemorySize(i
);
169 if (addr
>= begin
&& addr
< end
) {
170 Elf32_Addr offset
= addr
- begin
;
172 if (offset
< ProgramSegmentFileSize(i
)) {
173 *result
= ProgramSegmentFileOffset(i
) + offset
;
182 RVA
DisassemblerElf32::FileOffsetToRVA(size_t offset
) const {
183 // File offsets can be 64 bit values, but we are dealing with 32
184 // bit executables and so only need to support 32bit file sizes.
185 uint32 offset32
= (uint32
)offset
;
187 for (int i
= 0; i
< SectionHeaderCount(); i
++) {
189 const Elf32_Shdr
*section_header
= SectionHeader(i
);
191 // These can appear to have a size in the file, but don't.
192 if (section_header
->sh_type
== SHT_NOBITS
)
195 Elf32_Off section_begin
= section_header
->sh_offset
;
196 Elf32_Off section_end
= section_begin
+ section_header
->sh_size
;
198 if (offset32
>= section_begin
&& offset32
< section_end
) {
199 return section_header
->sh_addr
+ (offset32
- section_begin
);
206 CheckBool
DisassemblerElf32::RVAsToOffsets(std::vector
<RVA
>* rvas
,
207 std::vector
<size_t>* offsets
) {
210 for (std::vector
<RVA
>::iterator rva
= rvas
->begin();
216 if (!RVAToFileOffset(*rva
, &offset
))
219 offsets
->push_back(offset
);
225 CheckBool
DisassemblerElf32::RVAsToOffsets(ScopedVector
<TypedRVA
>* rvas
) {
226 for (ScopedVector
<TypedRVA
>::iterator rva
= rvas
->begin();
232 if (!RVAToFileOffset((*rva
)->rva(), &offset
))
235 (*rva
)->set_offset(offset
);
241 CheckBool
DisassemblerElf32::ParseFile(AssemblyProgram
* program
) {
242 // Walk all the bytes in the file, whether or not in a section.
243 uint32 file_offset
= 0;
245 std::vector
<size_t> abs_offsets
;
247 if (!RVAsToOffsets(&abs32_locations_
, &abs_offsets
))
250 if (!RVAsToOffsets(&rel32_locations_
))
253 std::vector
<size_t>::iterator current_abs_offset
= abs_offsets
.begin();
254 ScopedVector
<TypedRVA
>::iterator current_rel
= rel32_locations_
.begin();
256 std::vector
<size_t>::iterator end_abs_offset
= abs_offsets
.end();
257 ScopedVector
<TypedRVA
>::iterator end_rel
= rel32_locations_
.end();
259 for (int section_id
= 0;
260 section_id
< SectionHeaderCount();
263 const Elf32_Shdr
*section_header
= SectionHeader(section_id
);
265 if (!ParseSimpleRegion(file_offset
,
266 section_header
->sh_offset
,
269 file_offset
= section_header
->sh_offset
;
271 switch (section_header
->sh_type
) {
273 if (!ParseRelocationSection(section_header
, program
))
275 file_offset
= section_header
->sh_offset
+ section_header
->sh_size
;
278 if (!ParseProgbitsSection(section_header
,
279 ¤t_abs_offset
, end_abs_offset
,
280 ¤t_rel
, end_rel
,
283 file_offset
= section_header
->sh_offset
+ section_header
->sh_size
;
290 while (current_abs_offset
!= end_abs_offset
&&
291 *current_abs_offset
>= section_header
->sh_offset
&&
292 *current_abs_offset
<
293 (section_header
->sh_offset
+ section_header
->sh_size
)) {
294 // Skip any abs_offsets appear in the unsupported INIT_ARRAY section
295 VLOG(1) << "Skipping relocation entry for unsupported section: " <<
296 section_header
->sh_type
;
297 current_abs_offset
++;
301 if (current_abs_offset
!= end_abs_offset
&&
302 *current_abs_offset
>= section_header
->sh_offset
&&
303 *current_abs_offset
<
304 (section_header
->sh_offset
+ section_header
->sh_size
))
305 VLOG(1) << "Relocation address in unrecognized ELF section: " << \
306 section_header
->sh_type
;
311 // Rest of the file past the last section
312 if (!ParseSimpleRegion(file_offset
,
317 // Make certain we consume all of the relocations as expected
318 return (current_abs_offset
== end_abs_offset
);
321 CheckBool
DisassemblerElf32::ParseProgbitsSection(
322 const Elf32_Shdr
*section_header
,
323 std::vector
<size_t>::iterator
* current_abs_offset
,
324 std::vector
<size_t>::iterator end_abs_offset
,
325 ScopedVector
<TypedRVA
>::iterator
* current_rel
,
326 ScopedVector
<TypedRVA
>::iterator end_rel
,
327 AssemblyProgram
* program
) {
329 // Walk all the bytes in the file, whether or not in a section.
330 size_t file_offset
= section_header
->sh_offset
;
331 size_t section_end
= section_header
->sh_offset
+ section_header
->sh_size
;
333 Elf32_Addr origin
= section_header
->sh_addr
;
334 size_t origin_offset
= section_header
->sh_offset
;
335 if (!program
->EmitOriginInstruction(origin
))
338 while (file_offset
< section_end
) {
340 if (*current_abs_offset
!= end_abs_offset
&&
341 file_offset
> **current_abs_offset
)
344 while (*current_rel
!= end_rel
&&
345 file_offset
> (**current_rel
)->get_offset()) {
349 size_t next_relocation
= section_end
;
351 if (*current_abs_offset
!= end_abs_offset
&&
352 next_relocation
> **current_abs_offset
)
353 next_relocation
= **current_abs_offset
;
355 // Rel offsets are heuristically derived, and might (incorrectly) overlap
356 // an Abs value, or the end of the section, so +3 to make sure there is
357 // room for the full 4 byte value.
358 if (*current_rel
!= end_rel
&&
359 next_relocation
> ((**current_rel
)->get_offset() + 3))
360 next_relocation
= (**current_rel
)->get_offset();
362 if (next_relocation
> file_offset
) {
363 if (!ParseSimpleRegion(file_offset
, next_relocation
, program
))
366 file_offset
= next_relocation
;
370 if (*current_abs_offset
!= end_abs_offset
&&
371 file_offset
== **current_abs_offset
) {
373 const uint8
* p
= OffsetToPointer(file_offset
);
374 RVA target_rva
= Read32LittleEndian(p
);
376 if (!program
->EmitAbs32(program
->FindOrMakeAbs32Label(target_rva
)))
378 file_offset
+= sizeof(RVA
);
379 (*current_abs_offset
)++;
383 if (*current_rel
!= end_rel
&&
384 file_offset
== (**current_rel
)->get_offset()) {
386 uint32 relative_target
= (**current_rel
)->relative_target();
387 // This cast is for 64 bit systems, and is only safe because we
388 // are working on 32 bit executables.
389 RVA target_rva
= (RVA
)(origin
+ (file_offset
- origin_offset
) +
392 if (! (**current_rel
)->EmitInstruction(program
, target_rva
))
394 file_offset
+= (**current_rel
)->op_size();
400 // Rest of the section (if any)
401 return ParseSimpleRegion(file_offset
, section_end
, program
);
404 CheckBool
DisassemblerElf32::ParseSimpleRegion(
405 size_t start_file_offset
,
406 size_t end_file_offset
,
407 AssemblyProgram
* program
) {
409 const uint8
* start
= OffsetToPointer(start_file_offset
);
410 const uint8
* end
= OffsetToPointer(end_file_offset
);
412 // Callers don't guarantee start < end
413 if (start
>= end
) return true;
415 const ptrdiff_t len
= end
- start
; // Works because vars are byte pointers
417 if (!program
->EmitBytesInstruction(start
, len
))
423 CheckBool
DisassemblerElf32::ParseAbs32Relocs() {
424 abs32_locations_
.clear();
426 // Loop through sections for relocation sections
427 for (int section_id
= 0; section_id
< SectionHeaderCount(); section_id
++) {
428 const Elf32_Shdr
*section_header
= SectionHeader(section_id
);
430 if (section_header
->sh_type
== SHT_REL
) {
432 Elf32_Rel
*relocs_table
= (Elf32_Rel
*)SectionBody(section_id
);
434 int relocs_table_count
= section_header
->sh_size
/
435 section_header
->sh_entsize
;
437 // Elf32_Word relocation_section_id = section_header->sh_info;
439 // Loop through relocation objects in the relocation section
440 for (int rel_id
= 0; rel_id
< relocs_table_count
; rel_id
++) {
443 // Quite a few of these conversions fail, and we simply skip
444 // them, that's okay.
445 if (RelToRVA(relocs_table
[rel_id
], &rva
) && CheckSection(rva
))
446 abs32_locations_
.push_back(rva
);
451 std::sort(abs32_locations_
.begin(), abs32_locations_
.end());
455 CheckBool
DisassemblerElf32::CheckSection(RVA rva
) {
458 if (!RVAToFileOffset(rva
, &offset
)) {
462 for (int section_id
= 0;
463 section_id
< SectionHeaderCount();
466 const Elf32_Shdr
*section_header
= SectionHeader(section_id
);
468 if (offset
>= section_header
->sh_offset
&&
469 offset
< (section_header
->sh_offset
+ section_header
->sh_size
)) {
470 switch (section_header
->sh_type
) {
482 CheckBool
DisassemblerElf32::ParseRel32RelocsFromSections() {
484 rel32_locations_
.clear();
486 // Loop through sections for relocation sections
487 for (int section_id
= 0;
488 section_id
< SectionHeaderCount();
491 const Elf32_Shdr
*section_header
= SectionHeader(section_id
);
493 if (section_header
->sh_type
!= SHT_PROGBITS
)
496 if (!ParseRel32RelocsFromSection(section_header
))
500 std::sort(rel32_locations_
.begin(),
501 rel32_locations_
.end(),
502 TypedRVA::IsLessThan
);
506 } // namespace courgette