1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef COURGETTE_ASSEMBLY_PROGRAM_H_
6 #define COURGETTE_ASSEMBLY_PROGRAM_H_
12 #include "base/basictypes.h"
13 #include "base/memory/scoped_ptr.h"
15 #include "courgette/disassembler.h"
16 #include "courgette/memory_allocator.h"
23 typedef NoThrowBuffer
<Instruction
*> InstructionVector
;
25 // A Label is a symbolic reference to an address. Unlike a conventional
26 // assembly language, we always know the address. The address will later be
27 // stored in a table and the Label will be replaced with the index into the
30 // TODO(sra): Make fields private and add setters and getters.
33 static const int kNoIndex
= -1;
34 Label() : rva_(0), index_(kNoIndex
), count_(0) {}
35 explicit Label(RVA rva
) : rva_(rva
), index_(kNoIndex
), count_(0) {}
37 RVA rva_
; // Address referred to by the label.
38 int index_
; // Index of address in address table, kNoIndex until assigned.
42 typedef std::map
<RVA
, Label
*> RVAToLabel
;
44 // An AssemblyProgram is the result of disassembling an executable file.
46 // * The disassembler creates labels in the AssemblyProgram and emits
48 // * The disassembler then calls DefaultAssignIndexes to assign
49 // addresses to positions in the address tables.
51 // * At this point the AssemblyProgram can be converted into an
52 // EncodedProgram and serialized to an output stream.
53 // * Later, the EncodedProgram can be deserialized and assembled into
56 // The optional step is to modify the AssemblyProgram. One form of modification
57 // is to assign indexes in such a way as to make the EncodedProgram for this
58 // AssemblyProgram look more like the EncodedProgram for some other
59 // AssemblyProgram. The modification process should call UnassignIndexes, do
60 // its own assignment, and then call AssignRemainingIndexes to ensure all
61 // indexes are assigned.
63 class AssemblyProgram
{
65 explicit AssemblyProgram(ExecutableType kind
);
68 ExecutableType
kind() const { return kind_
; }
70 void set_image_base(uint64 image_base
) { image_base_
= image_base
; }
72 // Instructions will be assembled in the order they are emitted.
74 // Generates an entire base relocation table.
75 CheckBool
EmitPeRelocsInstruction() WARN_UNUSED_RESULT
;
77 // Generates an ELF style relocation table for X86.
78 CheckBool
EmitElfRelocationInstruction() WARN_UNUSED_RESULT
;
80 // Generates an ELF style relocation table for ARM.
81 CheckBool
EmitElfARMRelocationInstruction() WARN_UNUSED_RESULT
;
83 // Following instruction will be assembled at address 'rva'.
84 CheckBool
EmitOriginInstruction(RVA rva
) WARN_UNUSED_RESULT
;
86 // Generates a single byte of data or machine instruction.
87 CheckBool
EmitByteInstruction(uint8 byte
) WARN_UNUSED_RESULT
;
89 // Generates multiple bytes of data or machine instructions.
90 CheckBool
EmitBytesInstruction(const uint8
* value
, size_t len
)
93 // Generates 4-byte relative reference to address of 'label'.
94 CheckBool
EmitRel32(Label
* label
) WARN_UNUSED_RESULT
;
96 // Generates 4-byte relative reference to address of 'label' for
98 CheckBool
EmitRel32ARM(uint16 op
, Label
* label
, const uint8
* arm_op
,
99 uint16 op_size
) WARN_UNUSED_RESULT
;
101 // Generates 4-byte absolute reference to address of 'label'.
102 CheckBool
EmitAbs32(Label
* label
) WARN_UNUSED_RESULT
;
104 // Generates 8-byte absolute reference to address of 'label'.
105 CheckBool
EmitAbs64(Label
* label
) WARN_UNUSED_RESULT
;
107 // Looks up a label or creates a new one. Might return NULL.
108 Label
* FindOrMakeAbs32Label(RVA rva
);
110 // Looks up a label or creates a new one. Might return NULL.
111 Label
* FindOrMakeRel32Label(RVA rva
);
113 void DefaultAssignIndexes();
114 void UnassignIndexes();
115 void AssignRemainingIndexes();
117 EncodedProgram
* Encode() const;
119 // Accessor for instruction list.
120 const InstructionVector
& instructions() const {
121 return instructions_
;
124 // Returns the label if the instruction contains an absolute 32-bit address,
125 // otherwise returns NULL.
126 Label
* InstructionAbs32Label(const Instruction
* instruction
) const;
128 // Returns the label if the instruction contains an absolute 64-bit address,
129 // otherwise returns NULL.
130 Label
* InstructionAbs64Label(const Instruction
* instruction
) const;
132 // Returns the label if the instruction contains a rel32 offset,
133 // otherwise returns NULL.
134 Label
* InstructionRel32Label(const Instruction
* instruction
) const;
136 // Trim underused labels
137 CheckBool
TrimLabels();
140 ExecutableType kind_
;
142 CheckBool
Emit(Instruction
* instruction
) WARN_UNUSED_RESULT
;
144 static const int kLabelLowerLimit
;
146 // Looks up a label or creates a new one. Might return NULL.
147 Label
* FindLabel(RVA rva
, RVAToLabel
* labels
);
149 // Helper methods for the public versions.
150 static void UnassignIndexes(RVAToLabel
* labels
);
151 static void DefaultAssignIndexes(RVAToLabel
* labels
);
152 static void AssignRemainingIndexes(RVAToLabel
* labels
);
154 // Sharing instructions that emit a single byte saves a lot of space.
155 Instruction
* GetByteInstruction(uint8 byte
);
156 scoped_ptr
<Instruction
*[]> byte_instruction_cache_
;
158 uint64 image_base_
; // Desired or mandated base address of image.
160 InstructionVector instructions_
; // All the instructions in program.
162 // These are lookup maps to find the label associated with a given address.
163 // We have separate label spaces for addresses referenced by rel32 labels and
164 // abs32 labels. This is somewhat arbitrary.
165 RVAToLabel rel32_labels_
;
166 RVAToLabel abs32_labels_
;
168 DISALLOW_COPY_AND_ASSIGN(AssemblyProgram
);
171 } // namespace courgette
172 #endif // COURGETTE_ASSEMBLY_PROGRAM_H_