1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "courgette/assembly_program.h"
14 #include "base/logging.h"
15 #include "base/memory/scoped_ptr.h"
17 #include "courgette/courgette.h"
18 #include "courgette/encoded_program.h"
22 // Opcodes of simple assembly language
24 ORIGIN
, // ORIGIN <rva> - set current address for assembly.
25 MAKEPERELOCS
, // Generates a base relocation table.
26 MAKEELFRELOCS
, // Generates a base relocation table.
27 DEFBYTE
, // DEFBYTE <value> - emit a byte literal.
28 REL32
, // REL32 <label> - emit a rel32 encoded reference to 'label'.
29 ABS32
, // REL32 <label> - emit am abs32 encoded reference to 'label'.
33 // Base class for instructions. Because we have so many instructions we want to
34 // keep them as small as possible. For this reason we avoid virtual functions.
38 OP
op() const { return static_cast<OP
>(op_
); }
41 explicit Instruction(OP op
) : op_(op
), info_(0) {}
42 Instruction(OP op
, unsigned int info
) : op_(op
), info_(info
) {}
44 uint32 op_
: 4; // A few bits to store the OP code.
45 uint32 info_
: 28; // Remaining bits in first word available to subclass.
48 DISALLOW_COPY_AND_ASSIGN(Instruction
);
53 // Sets the current address for the emitting instructions.
54 class OriginInstruction
: public Instruction
{
56 explicit OriginInstruction(RVA rva
) : Instruction(ORIGIN
, 0), rva_(rva
) {}
57 RVA
origin_rva() const { return rva_
; }
62 // Emits an entire PE base relocation table.
63 class PeRelocsInstruction
: public Instruction
{
65 PeRelocsInstruction() : Instruction(MAKEPERELOCS
) {}
68 // Emits an ELF relocation table.
69 class ElfRelocsInstruction
: public Instruction
{
71 ElfRelocsInstruction() : Instruction(MAKEELFRELOCS
) {}
74 // Emits a single byte.
75 class ByteInstruction
: public Instruction
{
77 explicit ByteInstruction(uint8 value
) : Instruction(DEFBYTE
, value
) {}
78 uint8
byte_value() const { return info_
; }
81 // A ABS32 to REL32 instruction emits a reference to a label's address.
82 class InstructionWithLabel
: public Instruction
{
84 InstructionWithLabel(OP op
, Label
* label
)
85 : Instruction(op
, 0), label_(label
) {
86 if (label
== NULL
) NOTREACHED();
88 Label
* label() const { return label_
; }
95 AssemblyProgram::AssemblyProgram()
99 static void DeleteContainedLabels(const RVAToLabel
& labels
) {
100 for (RVAToLabel::const_iterator p
= labels
.begin(); p
!= labels
.end(); ++p
)
104 AssemblyProgram::~AssemblyProgram() {
105 for (size_t i
= 0; i
< instructions_
.size(); ++i
) {
106 Instruction
* instruction
= instructions_
[i
];
107 if (instruction
->op() != DEFBYTE
) // Will be in byte_instruction_cache_.
110 if (byte_instruction_cache_
.get()) {
111 for (size_t i
= 0; i
< 256; ++i
)
112 delete byte_instruction_cache_
[i
];
114 DeleteContainedLabels(rel32_labels_
);
115 DeleteContainedLabels(abs32_labels_
);
118 CheckBool
AssemblyProgram::EmitPeRelocsInstruction() {
119 return Emit(new(std::nothrow
) PeRelocsInstruction());
122 CheckBool
AssemblyProgram::EmitElfRelocationInstruction() {
123 return Emit(new(std::nothrow
) ElfRelocsInstruction());
126 CheckBool
AssemblyProgram::EmitOriginInstruction(RVA rva
) {
127 return Emit(new(std::nothrow
) OriginInstruction(rva
));
130 CheckBool
AssemblyProgram::EmitByteInstruction(uint8 byte
) {
131 return Emit(GetByteInstruction(byte
));
134 CheckBool
AssemblyProgram::EmitRel32(Label
* label
) {
135 return Emit(new(std::nothrow
) InstructionWithLabel(REL32
, label
));
138 CheckBool
AssemblyProgram::EmitAbs32(Label
* label
) {
139 return Emit(new(std::nothrow
) InstructionWithLabel(ABS32
, label
));
142 Label
* AssemblyProgram::FindOrMakeAbs32Label(RVA rva
) {
143 return FindLabel(rva
, &abs32_labels_
);
146 Label
* AssemblyProgram::FindOrMakeRel32Label(RVA rva
) {
147 return FindLabel(rva
, &rel32_labels_
);
150 void AssemblyProgram::DefaultAssignIndexes() {
151 DefaultAssignIndexes(&abs32_labels_
);
152 DefaultAssignIndexes(&rel32_labels_
);
155 void AssemblyProgram::UnassignIndexes() {
156 UnassignIndexes(&abs32_labels_
);
157 UnassignIndexes(&rel32_labels_
);
160 void AssemblyProgram::AssignRemainingIndexes() {
161 AssignRemainingIndexes(&abs32_labels_
);
162 AssignRemainingIndexes(&rel32_labels_
);
165 Label
* AssemblyProgram::InstructionAbs32Label(
166 const Instruction
* instruction
) const {
167 if (instruction
->op() == ABS32
)
168 return static_cast<const InstructionWithLabel
*>(instruction
)->label();
172 Label
* AssemblyProgram::InstructionRel32Label(
173 const Instruction
* instruction
) const {
174 if (instruction
->op() == REL32
)
175 return static_cast<const InstructionWithLabel
*>(instruction
)->label();
179 CheckBool
AssemblyProgram::Emit(Instruction
* instruction
) {
182 bool ok
= instructions_
.push_back(instruction
);
188 Label
* AssemblyProgram::FindLabel(RVA rva
, RVAToLabel
* labels
) {
189 Label
*& slot
= (*labels
)[rva
];
191 slot
= new(std::nothrow
) Label(rva
);
196 void AssemblyProgram::UnassignIndexes(RVAToLabel
* labels
) {
197 for (RVAToLabel::iterator p
= labels
->begin(); p
!= labels
->end(); ++p
) {
198 Label
* current
= p
->second
;
199 current
->index_
= Label::kNoIndex
;
203 // DefaultAssignIndexes takes a set of labels and assigns indexes in increasing
206 void AssemblyProgram::DefaultAssignIndexes(RVAToLabel
* labels
) {
208 for (RVAToLabel::iterator p
= labels
->begin(); p
!= labels
->end(); ++p
) {
209 Label
* current
= p
->second
;
210 if (current
->index_
!= Label::kNoIndex
)
212 current
->index_
= index
;
217 // AssignRemainingIndexes assigns indexes to any addresses (labels) that are not
218 // yet assigned an index.
220 void AssemblyProgram::AssignRemainingIndexes(RVAToLabel
* labels
) {
221 // An address table compresses best when each index is associated with an
222 // address that is slight larger than the previous index.
224 // First see which indexes have not been used. The 'available' vector could
225 // grow even bigger, but the number of addresses is a better starting size
227 std::vector
<bool> available(labels
->size(), true);
230 for (RVAToLabel::iterator p
= labels
->begin(); p
!= labels
->end(); ++p
) {
231 int index
= p
->second
->index_
;
232 if (index
!= Label::kNoIndex
) {
233 while (static_cast<size_t>(index
) >= available
.size())
234 available
.push_back(true);
235 available
.at(index
) = false;
240 VLOG(1) << used
<< " of " << labels
->size() << " labels pre-assigned";
242 // Are there any unused labels that happen to be adjacent following a used
245 int fill_forward_count
= 0;
247 for (RVAToLabel::iterator p
= labels
->begin(); p
!= labels
->end(); ++p
) {
248 Label
* current
= p
->second
;
249 if (current
->index_
== Label::kNoIndex
) {
251 if (prev
&& prev
->index_
!= Label::kNoIndex
)
252 index
= prev
->index_
+ 1;
253 if (index
< static_cast<int>(available
.size()) && available
.at(index
)) {
254 current
->index_
= index
;
255 available
.at(index
) = false;
256 ++fill_forward_count
;
262 // Are there any unused labels that happen to be adjacent preceeding a used
265 int fill_backward_count
= 0;
267 for (RVAToLabel::reverse_iterator p
= labels
->rbegin();
270 Label
* current
= p
->second
;
271 if (current
->index_
== Label::kNoIndex
) {
274 prev_index
= prev
->index_
;
276 prev_index
= static_cast<uint32
>(available
.size());
277 if (prev_index
!= 0 &&
278 prev_index
!= Label::kNoIndex
&&
279 available
.at(prev_index
- 1)) {
280 current
->index_
= prev_index
- 1;
281 available
.at(current
->index_
) = false;
282 ++fill_backward_count
;
288 // Fill in any remaining indexes
289 int fill_infill_count
= 0;
291 for (RVAToLabel::iterator p
= labels
->begin(); p
!= labels
->end(); ++p
) {
292 Label
* current
= p
->second
;
293 if (current
->index_
== Label::kNoIndex
) {
294 while (!available
.at(index
)) {
297 current
->index_
= index
;
298 available
.at(index
) = false;
304 VLOG(1) << " fill forward " << fill_forward_count
305 << " backward " << fill_backward_count
306 << " infill " << fill_infill_count
;
309 typedef CheckBool (EncodedProgram::*DefineLabelMethod
)(int index
, RVA value
);
314 static CheckBool
DefineLabels(const RVAToLabel
& labels
,
315 EncodedProgram
* encoded_format
,
316 DefineLabelMethod define_label
) {
318 for (RVAToLabel::const_iterator p
= labels
.begin();
319 ok
&& p
!= labels
.end();
321 Label
* label
= p
->second
;
322 ok
= (encoded_format
->*define_label
)(label
->index_
, label
->rva_
);
327 EncodedProgram
* AssemblyProgram::Encode() const {
328 scoped_ptr
<EncodedProgram
> encoded(new(std::nothrow
) EncodedProgram());
332 encoded
->set_image_base(image_base_
);
334 if (!DefineLabels(abs32_labels_
, encoded
.get(),
335 &EncodedProgram::DefineAbs32Label
) ||
336 !DefineLabels(rel32_labels_
, encoded
.get(),
337 &EncodedProgram::DefineRel32Label
)) {
341 encoded
->EndLabels();
343 for (size_t i
= 0; i
< instructions_
.size(); ++i
) {
344 Instruction
* instruction
= instructions_
[i
];
346 switch (instruction
->op()) {
348 OriginInstruction
* org
= static_cast<OriginInstruction
*>(instruction
);
349 if (!encoded
->AddOrigin(org
->origin_rva()))
354 uint8 b
= static_cast<ByteInstruction
*>(instruction
)->byte_value();
355 if (!encoded
->AddCopy(1, &b
))
360 Label
* label
= static_cast<InstructionWithLabel
*>(instruction
)->label();
361 if (!encoded
->AddRel32(label
->index_
))
366 Label
* label
= static_cast<InstructionWithLabel
*>(instruction
)->label();
367 if (!encoded
->AddAbs32(label
->index_
))
372 if (!encoded
->AddPeMakeRelocs())
376 case MAKEELFRELOCS
: {
377 if (!encoded
->AddElfMakeRelocs())
382 NOTREACHED() << "Unknown Insn OP kind";
387 return encoded
.release();
390 Instruction
* AssemblyProgram::GetByteInstruction(uint8 byte
) {
391 if (!byte_instruction_cache_
.get()) {
392 byte_instruction_cache_
.reset(new(std::nothrow
) Instruction
*[256]);
393 if (!byte_instruction_cache_
.get())
396 for (int i
= 0; i
< 256; ++i
) {
397 byte_instruction_cache_
[i
] =
398 new(std::nothrow
) ByteInstruction(static_cast<uint8
>(i
));
399 if (!byte_instruction_cache_
[i
]) {
400 for (int j
= 0; j
< i
; ++j
)
401 delete byte_instruction_cache_
[j
];
402 byte_instruction_cache_
.reset();
408 return byte_instruction_cache_
[byte
];
411 ////////////////////////////////////////////////////////////////////////////////
413 Status
Encode(AssemblyProgram
* program
, EncodedProgram
** output
) {
415 EncodedProgram
*encoded
= program
->Encode();
420 return C_GENERAL_ERROR
;
424 } // namespace courgette