1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // The main idea in Courgette is to do patching *under a tranformation*. The
6 // input is transformed into a new representation, patching occurs in the new
7 // repesentation, and then the tranform is reversed to get the patched data.
9 // The idea is applied to pieces (or 'elements') of the whole (or 'ensemble').
10 // Each of the elements has to go through the same set of steps in lock-step.
12 // This file contains the code to create the patch.
15 #include "courgette/ensemble.h"
20 #include "base/basictypes.h"
21 #include "base/logging.h"
22 #include "base/time/time.h"
24 #include "courgette/crc.h"
25 #include "courgette/difference_estimator.h"
26 #include "courgette/region.h"
27 #include "courgette/simple_delta.h"
28 #include "courgette/streams.h"
29 #include "courgette/third_party/bsdiff.h"
31 #include "courgette/patcher_x86_32.h"
32 #include "courgette/patch_generator_x86_32.h"
36 TransformationPatchGenerator::TransformationPatchGenerator(
39 TransformationPatcher
* patcher
)
40 : old_element_(old_element
),
41 new_element_(new_element
),
45 TransformationPatchGenerator::~TransformationPatchGenerator() {
49 // The default implementation of PredictTransformParameters delegates to the
51 Status
TransformationPatchGenerator::PredictTransformParameters(
52 SinkStreamSet
* prediction
) {
53 return patcher_
->PredictTransformParameters(prediction
);
56 // The default implementation of Reform delegates to the patcher.
57 Status
TransformationPatchGenerator::Reform(
58 SourceStreamSet
* transformed_element
,
59 SinkStream
* reformed_element
) {
60 return patcher_
->Reform(transformed_element
, reformed_element
);
63 // Makes a TransformationPatchGenerator of the appropriate variety for the
65 TransformationPatchGenerator
* MakeGenerator(Element
* old_element
,
66 Element
* new_element
) {
67 switch (new_element
->kind()) {
70 case EXE_WIN_32_X86
: {
71 TransformationPatchGenerator
* generator
=
72 new PatchGeneratorX86_32(
75 new PatcherX86_32(old_element
->region()),
79 case EXE_ELF_32_X86
: {
80 TransformationPatchGenerator
* generator
=
81 new PatchGeneratorX86_32(
84 new PatcherX86_32(old_element
->region()),
88 case EXE_ELF_32_ARM
: {
89 TransformationPatchGenerator
* generator
=
90 new PatchGeneratorX86_32(
93 new PatcherX86_32(old_element
->region()),
97 case EXE_WIN_32_X64
: {
98 TransformationPatchGenerator
* generator
=
99 new PatchGeneratorX86_32(
102 new PatcherX86_32(old_element
->region()),
108 LOG(WARNING
) << "Unexpected Element::Kind " << old_element
->kind();
112 // Checks to see if the proposed comparison is 'unsafe'. Sometimes one element
113 // from 'old' is matched as the closest element to multiple elements from 'new'.
114 // Each time this happens, the old element is transformed and serialized. This
115 // is a problem when the old element is huge compared with the new element
116 // because the mutliple serialized copies can be much bigger than the size of
119 // The right way to avoid this is to ensure any one element from 'old' is
120 // serialized once, which requires matching code in the patch application.
122 // This is a quick hack to avoid the problem by prohibiting a big difference in
123 // size between matching elements.
124 bool UnsafeDifference(Element
* old_element
, Element
* new_element
) {
125 double kMaxBloat
= 2.0;
126 size_t kMinWorrysomeDifference
= 2 << 20; // 2MB
127 size_t old_size
= old_element
->region().length();
128 size_t new_size
= new_element
->region().length();
129 size_t low_size
= std::min(old_size
, new_size
);
130 size_t high_size
= std::max(old_size
, new_size
);
131 if (high_size
- low_size
< kMinWorrysomeDifference
) return false;
132 if (high_size
< low_size
* kMaxBloat
) return false;
136 // FindGenerators finds TransformationPatchGenerators for the elements of
137 // |new_ensemble|. For each element of |new_ensemble| we find the closest
138 // matching element from |old_ensemble| and use that as the basis for
139 // differential compression. The elements have to be the same kind so as to
140 // support transformation into the same kind of 'new representation'.
142 Status
FindGenerators(Ensemble
* old_ensemble
, Ensemble
* new_ensemble
,
143 std::vector
<TransformationPatchGenerator
*>* generators
) {
144 base::Time start_find_time
= base::Time::Now();
145 old_ensemble
->FindEmbeddedElements();
146 new_ensemble
->FindEmbeddedElements();
147 VLOG(1) << "done FindEmbeddedElements "
148 << (base::Time::Now() - start_find_time
).InSecondsF();
150 std::vector
<Element
*> old_elements(old_ensemble
->elements());
151 std::vector
<Element
*> new_elements(new_ensemble
->elements());
153 VLOG(1) << "old has " << old_elements
.size() << " elements";
154 VLOG(1) << "new has " << new_elements
.size() << " elements";
156 DifferenceEstimator difference_estimator
;
157 std::vector
<DifferenceEstimator::Base
*> bases
;
159 base::Time start_bases_time
= base::Time::Now();
160 for (size_t i
= 0; i
< old_elements
.size(); ++i
) {
162 difference_estimator
.MakeBase(old_elements
[i
]->region()));
164 VLOG(1) << "done make bases "
165 << (base::Time::Now() - start_bases_time
).InSecondsF() << "s";
167 for (size_t new_index
= 0; new_index
< new_elements
.size(); ++new_index
) {
168 Element
* new_element
= new_elements
[new_index
];
169 DifferenceEstimator::Subject
* new_subject
=
170 difference_estimator
.MakeSubject(new_element
->region());
172 // Search through old elements to find the best match.
174 // TODO(sra): This is O(N x M), i.e. O(N^2) since old_ensemble and
175 // new_ensemble probably have a very similar structure. We can make the
176 // search faster by making the comparison provided by DifferenceEstimator
177 // more nuanced, returning early if the measured difference is greater than
178 // the current best. This will be most effective if we can arrange that the
179 // first elements we try to match are likely the 'right' ones. We could
180 // prioritize elements that are of a similar size or similar position in the
181 // sequence of elements.
183 Element
* best_old_element
= NULL
;
184 size_t best_difference
= std::numeric_limits
<size_t>::max();
185 for (size_t old_index
= 0; old_index
< old_elements
.size(); ++old_index
) {
186 Element
* old_element
= old_elements
[old_index
];
187 // Elements of different kinds are incompatible.
188 if (old_element
->kind() != new_element
->kind())
191 if (UnsafeDifference(old_element
, new_element
))
194 base::Time start_compare
= base::Time::Now();
195 DifferenceEstimator::Base
* old_base
= bases
[old_index
];
196 size_t difference
= difference_estimator
.Measure(old_base
, new_subject
);
198 VLOG(1) << "Compare " << old_element
->Name()
199 << " to " << new_element
->Name()
200 << " --> " << difference
201 << " in " << (base::Time::Now() - start_compare
).InSecondsF()
203 if (difference
== 0) {
204 VLOG(1) << "Skip " << new_element
->Name()
205 << " - identical to " << old_element
->Name();
207 best_old_element
= NULL
;
210 if (difference
< best_difference
) {
211 best_difference
= difference
;
212 best_old_element
= old_element
;
216 if (best_old_element
) {
217 VLOG(1) << "Matched " << best_old_element
->Name()
218 << " to " << new_element
->Name()
219 << " --> " << best_difference
;
220 TransformationPatchGenerator
* generator
=
221 MakeGenerator(best_old_element
, new_element
);
223 generators
->push_back(generator
);
227 VLOG(1) << "done FindGenerators found " << generators
->size()
228 << " in " << (base::Time::Now() - start_find_time
).InSecondsF()
234 void FreeGenerators(std::vector
<TransformationPatchGenerator
*>* generators
) {
235 for (size_t i
= 0; i
< generators
->size(); ++i
) {
236 delete (*generators
)[i
];
241 ////////////////////////////////////////////////////////////////////////////////
243 Status
GenerateEnsemblePatch(SourceStream
* base
,
244 SourceStream
* update
,
245 SinkStream
* final_patch
) {
246 VLOG(1) << "start GenerateEnsemblePatch";
247 base::Time start_time
= base::Time::Now();
249 Region
old_region(base
->Buffer(), base
->Remaining());
250 Region
new_region(update
->Buffer(), update
->Remaining());
251 Ensemble
old_ensemble(old_region
, "old");
252 Ensemble
new_ensemble(new_region
, "new");
253 std::vector
<TransformationPatchGenerator
*> generators
;
254 Status generators_status
= FindGenerators(&old_ensemble
, &new_ensemble
,
256 if (generators_status
!= C_OK
)
257 return generators_status
;
259 SinkStreamSet patch_streams
;
261 SinkStream
* tranformation_descriptions
= patch_streams
.stream(0);
262 SinkStream
* parameter_correction
= patch_streams
.stream(1);
263 SinkStream
* transformed_elements_correction
= patch_streams
.stream(2);
264 SinkStream
* ensemble_correction
= patch_streams
.stream(3);
266 size_t number_of_transformations
= generators
.size();
267 if (!tranformation_descriptions
->WriteSizeVarint32(number_of_transformations
))
268 return C_STREAM_ERROR
;
270 for (size_t i
= 0; i
< number_of_transformations
; ++i
) {
271 ExecutableType kind
= generators
[i
]->Kind();
272 if (!tranformation_descriptions
->WriteVarint32(kind
))
273 return C_STREAM_ERROR
;
276 for (size_t i
= 0; i
< number_of_transformations
; ++i
) {
278 generators
[i
]->WriteInitialParameters(tranformation_descriptions
);
284 // Generate sub-patch for parameters.
286 SinkStreamSet predicted_parameters_sink
;
287 SinkStreamSet corrected_parameters_sink
;
289 for (size_t i
= 0; i
< number_of_transformations
; ++i
) {
290 SinkStreamSet single_predicted_parameters
;
292 status
= generators
[i
]->PredictTransformParameters(
293 &single_predicted_parameters
);
296 if (!predicted_parameters_sink
.WriteSet(&single_predicted_parameters
))
297 return C_STREAM_ERROR
;
299 SinkStreamSet single_corrected_parameters
;
300 status
= generators
[i
]->CorrectedTransformParameters(
301 &single_corrected_parameters
);
304 if (!corrected_parameters_sink
.WriteSet(&single_corrected_parameters
))
305 return C_STREAM_ERROR
;
308 SinkStream linearized_predicted_parameters
;
309 SinkStream linearized_corrected_parameters
;
311 if (!predicted_parameters_sink
.CopyTo(&linearized_predicted_parameters
))
312 return C_STREAM_ERROR
;
313 if (!corrected_parameters_sink
.CopyTo(&linearized_corrected_parameters
))
314 return C_STREAM_ERROR
;
316 SourceStream predicted_parameters_source
;
317 SourceStream corrected_parameters_source
;
318 predicted_parameters_source
.Init(linearized_predicted_parameters
);
319 corrected_parameters_source
.Init(linearized_corrected_parameters
);
321 Status delta1_status
= GenerateSimpleDelta(&predicted_parameters_source
,
322 &corrected_parameters_source
,
323 parameter_correction
);
324 if (delta1_status
!= C_OK
)
325 return delta1_status
;
328 // Generate sub-patch for elements.
330 corrected_parameters_source
.Init(linearized_corrected_parameters
);
331 SourceStreamSet corrected_parameters_source_set
;
332 if (!corrected_parameters_source_set
.Init(&corrected_parameters_source
))
333 return C_STREAM_ERROR
;
335 SinkStreamSet predicted_transformed_elements
;
336 SinkStreamSet corrected_transformed_elements
;
338 for (size_t i
= 0; i
< number_of_transformations
; ++i
) {
339 SourceStreamSet single_parameters
;
340 if (!corrected_parameters_source_set
.ReadSet(&single_parameters
))
341 return C_STREAM_ERROR
;
342 SinkStreamSet single_predicted_transformed_element
;
343 SinkStreamSet single_corrected_transformed_element
;
344 Status status
= generators
[i
]->Transform(
346 &single_predicted_transformed_element
,
347 &single_corrected_transformed_element
);
350 if (!single_parameters
.Empty())
351 return C_STREAM_NOT_CONSUMED
;
352 if (!predicted_transformed_elements
.WriteSet(
353 &single_predicted_transformed_element
))
354 return C_STREAM_ERROR
;
355 if (!corrected_transformed_elements
.WriteSet(
356 &single_corrected_transformed_element
))
357 return C_STREAM_ERROR
;
360 if (!corrected_parameters_source_set
.Empty())
361 return C_STREAM_NOT_CONSUMED
;
363 SinkStream linearized_predicted_transformed_elements
;
364 SinkStream linearized_corrected_transformed_elements
;
366 if (!predicted_transformed_elements
.CopyTo(
367 &linearized_predicted_transformed_elements
))
368 return C_STREAM_ERROR
;
369 if (!corrected_transformed_elements
.CopyTo(
370 &linearized_corrected_transformed_elements
))
371 return C_STREAM_ERROR
;
373 SourceStream predicted_transformed_elements_source
;
374 SourceStream corrected_transformed_elements_source
;
375 predicted_transformed_elements_source
376 .Init(linearized_predicted_transformed_elements
);
377 corrected_transformed_elements_source
378 .Init(linearized_corrected_transformed_elements
);
380 Status delta2_status
=
381 GenerateSimpleDelta(&predicted_transformed_elements_source
,
382 &corrected_transformed_elements_source
,
383 transformed_elements_correction
);
384 if (delta2_status
!= C_OK
)
385 return delta2_status
;
387 // Last use, free storage.
388 linearized_predicted_transformed_elements
.Retire();
391 // Generate sub-patch for whole enchilada.
393 SinkStream predicted_ensemble
;
395 if (!predicted_ensemble
.Write(base
->Buffer(), base
->Remaining()))
396 return C_STREAM_ERROR
;
398 SourceStreamSet corrected_transformed_elements_source_set
;
399 corrected_transformed_elements_source
400 .Init(linearized_corrected_transformed_elements
);
401 if (!corrected_transformed_elements_source_set
402 .Init(&corrected_transformed_elements_source
))
403 return C_STREAM_ERROR
;
405 for (size_t i
= 0; i
< number_of_transformations
; ++i
) {
406 SourceStreamSet single_corrected_transformed_element
;
407 if (!corrected_transformed_elements_source_set
.ReadSet(
408 &single_corrected_transformed_element
))
409 return C_STREAM_ERROR
;
410 Status status
= generators
[i
]->Reform(&single_corrected_transformed_element
,
411 &predicted_ensemble
);
414 if (!single_corrected_transformed_element
.Empty())
415 return C_STREAM_NOT_CONSUMED
;
418 if (!corrected_transformed_elements_source_set
.Empty())
419 return C_STREAM_NOT_CONSUMED
;
421 // No more references to this stream's buffer.
422 linearized_corrected_transformed_elements
.Retire();
424 FreeGenerators(&generators
);
426 size_t final_patch_input_size
= predicted_ensemble
.Length();
427 SourceStream predicted_ensemble_source
;
428 predicted_ensemble_source
.Init(predicted_ensemble
);
429 Status delta3_status
= GenerateSimpleDelta(&predicted_ensemble_source
,
431 ensemble_correction
);
432 if (delta3_status
!= C_OK
)
433 return delta3_status
;
436 // Final output stream has a header followed by a StreamSet.
438 if (!final_patch
->WriteVarint32(CourgettePatchFile::kMagic
) ||
439 !final_patch
->WriteVarint32(CourgettePatchFile::kVersion
) ||
440 !final_patch
->WriteVarint32(CalculateCrc(old_region
.start(),
441 old_region
.length())) ||
442 !final_patch
->WriteVarint32(CalculateCrc(new_region
.start(),
443 new_region
.length())) ||
444 !final_patch
->WriteSizeVarint32(final_patch_input_size
) ||
445 !patch_streams
.CopyTo(final_patch
)) {
446 return C_STREAM_ERROR
;
449 VLOG(1) << "done GenerateEnsemblePatch "
450 << (base::Time::Now() - start_time
).InSecondsF() << "s";