1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "courgette/assembly_program.h"
14 #include "base/logging.h"
15 #include "base/memory/scoped_ptr.h"
17 #include "courgette/courgette.h"
18 #include "courgette/encoded_program.h"
22 // Opcodes of simple assembly language
24 ORIGIN
, // ORIGIN <rva> - set current address for assembly.
25 MAKEPERELOCS
, // Generates a base relocation table.
26 MAKEELFRELOCS
, // Generates a base relocation table.
27 DEFBYTE
, // DEFBYTE <value> - emit a byte literal.
28 REL32
, // REL32 <label> - emit a rel32 encoded reference to 'label'.
29 ABS32
, // ABS32 <label> - emit an abs32 encoded reference to 'label'.
30 REL32ARM
, // REL32ARM <c_op> <label> - arm-specific rel32 reference
31 MAKEELFARMRELOCS
, // Generates a base relocation table.
32 DEFBYTES
, // Emits any number of byte literals
33 ABS64
, // ABS64 <label> - emit an abs64 encoded reference to 'label'.
37 // Base class for instructions. Because we have so many instructions we want to
38 // keep them as small as possible. For this reason we avoid virtual functions.
42 OP
op() const { return static_cast<OP
>(op_
); }
45 explicit Instruction(OP op
) : op_(op
), info_(0) {}
46 Instruction(OP op
, unsigned int info
) : op_(op
), info_(info
) {}
48 uint32 op_
: 4; // A few bits to store the OP code.
49 uint32 info_
: 28; // Remaining bits in first word available to subclass.
52 DISALLOW_COPY_AND_ASSIGN(Instruction
);
57 // Sets the current address for the emitting instructions.
58 class OriginInstruction
: public Instruction
{
60 explicit OriginInstruction(RVA rva
) : Instruction(ORIGIN
, 0), rva_(rva
) {}
61 RVA
origin_rva() const { return rva_
; }
66 // Emits an entire PE base relocation table.
67 class PeRelocsInstruction
: public Instruction
{
69 PeRelocsInstruction() : Instruction(MAKEPERELOCS
) {}
72 // Emits an ELF relocation table.
73 class ElfRelocsInstruction
: public Instruction
{
75 ElfRelocsInstruction() : Instruction(MAKEELFRELOCS
) {}
78 // Emits an ELF ARM relocation table.
79 class ElfARMRelocsInstruction
: public Instruction
{
81 ElfARMRelocsInstruction() : Instruction(MAKEELFARMRELOCS
) {}
84 // Emits a single byte.
85 class ByteInstruction
: public Instruction
{
87 explicit ByteInstruction(uint8 value
) : Instruction(DEFBYTE
, value
) {}
88 uint8
byte_value() const { return info_
; }
91 // Emits a single byte.
92 class BytesInstruction
: public Instruction
{
94 BytesInstruction(const uint8
* values
, size_t len
)
95 : Instruction(DEFBYTES
, 0),
98 const uint8
* byte_values() const { return values_
; }
99 size_t len() const { return len_
; }
102 const uint8
* values_
;
106 // A ABS32 to REL32 instruction emits a reference to a label's address.
107 class InstructionWithLabel
: public Instruction
{
109 InstructionWithLabel(OP op
, Label
* label
)
110 : Instruction(op
, 0), label_(label
) {
111 if (label
== NULL
) NOTREACHED();
113 Label
* label() const { return label_
; }
118 // An ARM REL32 instruction emits a reference to a label's address and
119 // a specially-compressed ARM op.
120 class InstructionWithLabelARM
: public InstructionWithLabel
{
122 InstructionWithLabelARM(OP op
, uint16 compressed_op
, Label
* label
,
123 const uint8
* arm_op
, uint16 op_size
)
124 : InstructionWithLabel(op
, label
), compressed_op_(compressed_op
),
125 arm_op_(arm_op
), op_size_(op_size
) {
126 if (label
== NULL
) NOTREACHED();
128 uint16
compressed_op() const { return compressed_op_
; }
129 const uint8
* arm_op() const { return arm_op_
; }
130 uint16
op_size() const { return op_size_
; }
132 uint16 compressed_op_
;
133 const uint8
* arm_op_
;
139 AssemblyProgram::AssemblyProgram(ExecutableType kind
)
140 : kind_(kind
), image_base_(0) {
143 static void DeleteContainedLabels(const RVAToLabel
& labels
) {
144 for (RVAToLabel::const_iterator p
= labels
.begin(); p
!= labels
.end(); ++p
)
148 AssemblyProgram::~AssemblyProgram() {
149 for (size_t i
= 0; i
< instructions_
.size(); ++i
) {
150 Instruction
* instruction
= instructions_
[i
];
151 if (instruction
->op() != DEFBYTE
) // Will be in byte_instruction_cache_.
154 if (byte_instruction_cache_
.get()) {
155 for (size_t i
= 0; i
< 256; ++i
)
156 delete byte_instruction_cache_
[i
];
158 DeleteContainedLabels(rel32_labels_
);
159 DeleteContainedLabels(abs32_labels_
);
162 CheckBool
AssemblyProgram::EmitPeRelocsInstruction() {
163 return Emit(new(std::nothrow
) PeRelocsInstruction());
166 CheckBool
AssemblyProgram::EmitElfRelocationInstruction() {
167 return Emit(new(std::nothrow
) ElfRelocsInstruction());
170 CheckBool
AssemblyProgram::EmitElfARMRelocationInstruction() {
171 return Emit(new(std::nothrow
) ElfARMRelocsInstruction());
174 CheckBool
AssemblyProgram::EmitOriginInstruction(RVA rva
) {
175 return Emit(new(std::nothrow
) OriginInstruction(rva
));
178 CheckBool
AssemblyProgram::EmitByteInstruction(uint8 byte
) {
179 return Emit(GetByteInstruction(byte
));
182 CheckBool
AssemblyProgram::EmitBytesInstruction(const uint8
* values
,
184 return Emit(new(std::nothrow
) BytesInstruction(values
, len
));
187 CheckBool
AssemblyProgram::EmitRel32(Label
* label
) {
188 return Emit(new(std::nothrow
) InstructionWithLabel(REL32
, label
));
191 CheckBool
AssemblyProgram::EmitRel32ARM(uint16 op
, Label
* label
,
192 const uint8
* arm_op
, uint16 op_size
) {
193 return Emit(new(std::nothrow
) InstructionWithLabelARM(REL32ARM
, op
, label
,
197 CheckBool
AssemblyProgram::EmitAbs32(Label
* label
) {
198 return Emit(new(std::nothrow
) InstructionWithLabel(ABS32
, label
));
201 CheckBool
AssemblyProgram::EmitAbs64(Label
* label
) {
202 return Emit(new (std::nothrow
) InstructionWithLabel(ABS64
, label
));
205 Label
* AssemblyProgram::FindOrMakeAbs32Label(RVA rva
) {
206 return FindLabel(rva
, &abs32_labels_
);
209 Label
* AssemblyProgram::FindOrMakeRel32Label(RVA rva
) {
210 return FindLabel(rva
, &rel32_labels_
);
213 void AssemblyProgram::DefaultAssignIndexes() {
214 DefaultAssignIndexes(&abs32_labels_
);
215 DefaultAssignIndexes(&rel32_labels_
);
218 void AssemblyProgram::UnassignIndexes() {
219 UnassignIndexes(&abs32_labels_
);
220 UnassignIndexes(&rel32_labels_
);
223 void AssemblyProgram::AssignRemainingIndexes() {
224 AssignRemainingIndexes(&abs32_labels_
);
225 AssignRemainingIndexes(&rel32_labels_
);
228 Label
* AssemblyProgram::InstructionAbs32Label(
229 const Instruction
* instruction
) const {
230 if (instruction
->op() == ABS32
)
231 return static_cast<const InstructionWithLabel
*>(instruction
)->label();
235 Label
* AssemblyProgram::InstructionAbs64Label(
236 const Instruction
* instruction
) const {
237 if (instruction
->op() == ABS64
)
238 return static_cast<const InstructionWithLabel
*>(instruction
)->label();
242 Label
* AssemblyProgram::InstructionRel32Label(
243 const Instruction
* instruction
) const {
244 if (instruction
->op() == REL32
|| instruction
->op() == REL32ARM
) {
246 static_cast<const InstructionWithLabel
*>(instruction
)->label();
252 CheckBool
AssemblyProgram::Emit(Instruction
* instruction
) {
255 bool ok
= instructions_
.push_back(instruction
);
261 Label
* AssemblyProgram::FindLabel(RVA rva
, RVAToLabel
* labels
) {
262 Label
*& slot
= (*labels
)[rva
];
264 slot
= new(std::nothrow
) Label(rva
);
272 void AssemblyProgram::UnassignIndexes(RVAToLabel
* labels
) {
273 for (RVAToLabel::iterator p
= labels
->begin(); p
!= labels
->end(); ++p
) {
274 Label
* current
= p
->second
;
275 current
->index_
= Label::kNoIndex
;
279 // DefaultAssignIndexes takes a set of labels and assigns indexes in increasing
282 void AssemblyProgram::DefaultAssignIndexes(RVAToLabel
* labels
) {
284 for (RVAToLabel::iterator p
= labels
->begin(); p
!= labels
->end(); ++p
) {
285 Label
* current
= p
->second
;
286 if (current
->index_
!= Label::kNoIndex
)
288 current
->index_
= index
;
293 // AssignRemainingIndexes assigns indexes to any addresses (labels) that are not
294 // yet assigned an index.
296 void AssemblyProgram::AssignRemainingIndexes(RVAToLabel
* labels
) {
297 // An address table compresses best when each index is associated with an
298 // address that is slight larger than the previous index.
300 // First see which indexes have not been used. The 'available' vector could
301 // grow even bigger, but the number of addresses is a better starting size
303 std::vector
<bool> available(labels
->size(), true);
306 for (RVAToLabel::iterator p
= labels
->begin(); p
!= labels
->end(); ++p
) {
307 int index
= p
->second
->index_
;
308 if (index
!= Label::kNoIndex
) {
309 while (static_cast<size_t>(index
) >= available
.size())
310 available
.push_back(true);
311 available
.at(index
) = false;
316 VLOG(1) << used
<< " of " << labels
->size() << " labels pre-assigned";
318 // Are there any unused labels that happen to be adjacent following a used
321 int fill_forward_count
= 0;
323 for (RVAToLabel::iterator p
= labels
->begin(); p
!= labels
->end(); ++p
) {
324 Label
* current
= p
->second
;
325 if (current
->index_
== Label::kNoIndex
) {
327 if (prev
&& prev
->index_
!= Label::kNoIndex
)
328 index
= prev
->index_
+ 1;
329 if (index
< static_cast<int>(available
.size()) && available
.at(index
)) {
330 current
->index_
= index
;
331 available
.at(index
) = false;
332 ++fill_forward_count
;
338 // Are there any unused labels that happen to be adjacent preceeding a used
341 int fill_backward_count
= 0;
343 for (RVAToLabel::reverse_iterator p
= labels
->rbegin();
346 Label
* current
= p
->second
;
347 if (current
->index_
== Label::kNoIndex
) {
350 prev_index
= prev
->index_
;
352 prev_index
= static_cast<uint32
>(available
.size());
353 if (prev_index
!= 0 &&
354 prev_index
!= Label::kNoIndex
&&
355 available
.at(prev_index
- 1)) {
356 current
->index_
= prev_index
- 1;
357 available
.at(current
->index_
) = false;
358 ++fill_backward_count
;
364 // Fill in any remaining indexes
365 int fill_infill_count
= 0;
367 for (RVAToLabel::iterator p
= labels
->begin(); p
!= labels
->end(); ++p
) {
368 Label
* current
= p
->second
;
369 if (current
->index_
== Label::kNoIndex
) {
370 while (!available
.at(index
)) {
373 current
->index_
= index
;
374 available
.at(index
) = false;
380 VLOG(1) << " fill forward " << fill_forward_count
381 << " backward " << fill_backward_count
382 << " infill " << fill_infill_count
;
385 typedef CheckBool (EncodedProgram::*DefineLabelMethod
)(int index
, RVA value
);
390 static CheckBool
DefineLabels(const RVAToLabel
& labels
,
391 EncodedProgram
* encoded_format
,
392 DefineLabelMethod define_label
) {
394 for (RVAToLabel::const_iterator p
= labels
.begin();
395 ok
&& p
!= labels
.end();
397 Label
* label
= p
->second
;
398 ok
= (encoded_format
->*define_label
)(label
->index_
, label
->rva_
);
403 EncodedProgram
* AssemblyProgram::Encode() const {
404 scoped_ptr
<EncodedProgram
> encoded(new(std::nothrow
) EncodedProgram());
408 encoded
->set_image_base(image_base_
);
410 if (!DefineLabels(abs32_labels_
, encoded
.get(),
411 &EncodedProgram::DefineAbs32Label
) ||
412 !DefineLabels(rel32_labels_
, encoded
.get(),
413 &EncodedProgram::DefineRel32Label
)) {
417 encoded
->EndLabels();
419 for (size_t i
= 0; i
< instructions_
.size(); ++i
) {
420 Instruction
* instruction
= instructions_
[i
];
422 switch (instruction
->op()) {
424 OriginInstruction
* org
= static_cast<OriginInstruction
*>(instruction
);
425 if (!encoded
->AddOrigin(org
->origin_rva()))
430 uint8 b
= static_cast<ByteInstruction
*>(instruction
)->byte_value();
431 if (!encoded
->AddCopy(1, &b
))
436 const uint8
* byte_values
=
437 static_cast<BytesInstruction
*>(instruction
)->byte_values();
438 size_t len
= static_cast<BytesInstruction
*>(instruction
)->len();
440 if (!encoded
->AddCopy(len
, byte_values
))
445 Label
* label
= static_cast<InstructionWithLabel
*>(instruction
)->label();
446 if (!encoded
->AddRel32(label
->index_
))
452 static_cast<InstructionWithLabelARM
*>(instruction
)->label();
453 uint16 compressed_op
=
454 static_cast<InstructionWithLabelARM
*>(instruction
)->
456 if (!encoded
->AddRel32ARM(compressed_op
, label
->index_
))
461 Label
* label
= static_cast<InstructionWithLabel
*>(instruction
)->label();
462 if (!encoded
->AddAbs32(label
->index_
))
467 Label
* label
= static_cast<InstructionWithLabel
*>(instruction
)->label();
468 if (!encoded
->AddAbs64(label
->index_
))
473 if (!encoded
->AddPeMakeRelocs(kind_
))
477 case MAKEELFRELOCS
: {
478 if (!encoded
->AddElfMakeRelocs())
482 case MAKEELFARMRELOCS
: {
483 if (!encoded
->AddElfARMMakeRelocs())
488 NOTREACHED() << "Unknown Insn OP kind";
493 return encoded
.release();
496 Instruction
* AssemblyProgram::GetByteInstruction(uint8 byte
) {
497 if (!byte_instruction_cache_
.get()) {
498 byte_instruction_cache_
.reset(new(std::nothrow
) Instruction
*[256]);
499 if (!byte_instruction_cache_
.get())
502 for (int i
= 0; i
< 256; ++i
) {
503 byte_instruction_cache_
[i
] =
504 new(std::nothrow
) ByteInstruction(static_cast<uint8
>(i
));
505 if (!byte_instruction_cache_
[i
]) {
506 for (int j
= 0; j
< i
; ++j
)
507 delete byte_instruction_cache_
[j
];
508 byte_instruction_cache_
.reset();
514 return byte_instruction_cache_
[byte
];
517 // Chosen empirically to give the best reduction in payload size for
518 // an update from daisy_3701.98.0 to daisy_4206.0.0.
519 const int AssemblyProgram::kLabelLowerLimit
= 5;
521 CheckBool
AssemblyProgram::TrimLabels() {
522 // For now only trim for ARM binaries
523 if (kind() != EXE_ELF_32_ARM
)
526 int lower_limit
= kLabelLowerLimit
;
528 VLOG(1) << "TrimLabels: threshold " << lower_limit
;
530 // Remove underused labels from the list of labels
531 RVAToLabel::iterator it
= rel32_labels_
.begin();
532 while (it
!= rel32_labels_
.end()) {
533 if (it
->second
->count_
<= lower_limit
) {
534 rel32_labels_
.erase(it
++);
540 // Walk through the list of instructions, replacing trimmed labels
541 // with the original machine instruction
542 for (size_t i
= 0; i
< instructions_
.size(); ++i
) {
543 Instruction
* instruction
= instructions_
[i
];
544 switch (instruction
->op()) {
547 static_cast<InstructionWithLabelARM
*>(instruction
)->label();
548 if (label
->count_
<= lower_limit
) {
549 const uint8
* arm_op
=
550 static_cast<InstructionWithLabelARM
*>(instruction
)->arm_op();
552 static_cast<InstructionWithLabelARM
*>(instruction
)->op_size();
556 BytesInstruction
* new_instruction
=
557 new(std::nothrow
) BytesInstruction(arm_op
, op_size
);
558 instructions_
[i
] = new_instruction
;
571 ////////////////////////////////////////////////////////////////////////////////
573 Status
TrimLabels(AssemblyProgram
* program
) {
574 if (program
->TrimLabels())
577 return C_TRIM_FAILED
;
580 Status
Encode(AssemblyProgram
* program
, EncodedProgram
** output
) {
582 EncodedProgram
*encoded
= program
->Encode();
587 return C_GENERAL_ERROR
;
591 } // namespace courgette