blob: b579e56aed4bf38fdac93436b97ae0e73cda9789 [file] [log] [blame]
Tianjie Xu57dd9612017-08-17 17:50:56 -07001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef _APPLYPATCH_IMGDIFF_IMAGE_H
18#define _APPLYPATCH_IMGDIFF_IMAGE_H
19
20#include <stddef.h>
21#include <stdio.h>
22#include <sys/types.h>
23
24#include <string>
25#include <vector>
26
Alex Deymofa188262017-10-10 17:56:17 +020027#include <bsdiff/bsdiff.h>
Tianjie Xu57dd9612017-08-17 17:50:56 -070028#include <ziparchive/zip_archive.h>
29#include <zlib.h>
30
31#include "imgdiff.h"
Tao Bao09e468f2017-09-29 14:39:33 -070032#include "otautil/rangeset.h"
Tianjie Xu57dd9612017-08-17 17:50:56 -070033
34class ImageChunk {
35 public:
36 static constexpr auto WINDOWBITS = -15; // 32kb window; negative to indicate a raw stream.
37 static constexpr auto MEMLEVEL = 8; // the default value.
38 static constexpr auto METHOD = Z_DEFLATED;
39 static constexpr auto STRATEGY = Z_DEFAULT_STRATEGY;
40
41 ImageChunk(int type, size_t start, const std::vector<uint8_t>* file_content, size_t raw_data_len,
42 std::string entry_name = {});
43
44 int GetType() const {
45 return type_;
46 }
Tianjie Xucc61cf62018-05-23 22:23:31 -070047
48 const uint8_t* GetRawData() const;
Tianjie Xu57dd9612017-08-17 17:50:56 -070049 size_t GetRawDataLength() const {
50 return raw_data_len_;
51 }
52 const std::string& GetEntryName() const {
53 return entry_name_;
54 }
55 size_t GetStartOffset() const {
56 return start_;
57 }
58 int GetCompressLevel() const {
59 return compress_level_;
60 }
61
62 // CHUNK_DEFLATE will return the uncompressed data for diff, while other types will simply return
63 // the raw data.
64 const uint8_t* DataForPatch() const;
65 size_t DataLengthForPatch() const;
66
Tianjie Xu6e293c92017-11-15 16:26:41 -080067 void Dump(size_t index) const;
Tianjie Xu57dd9612017-08-17 17:50:56 -070068
69 void SetUncompressedData(std::vector<uint8_t> data);
70 bool SetBonusData(const std::vector<uint8_t>& bonus_data);
71
72 bool operator==(const ImageChunk& other) const;
73 bool operator!=(const ImageChunk& other) const {
74 return !(*this == other);
75 }
76
77 /*
78 * Cause a gzip chunk to be treated as a normal chunk (ie, as a blob of uninterpreted data).
79 * The resulting patch will likely be about as big as the target file, but it lets us handle
80 * the case of images where some gzip chunks are reconstructible but others aren't (by treating
81 * the ones that aren't as normal chunks).
82 */
83 void ChangeDeflateChunkToNormal();
84
85 /*
86 * Verify that we can reproduce exactly the same compressed data that we started with. Sets the
87 * level, method, windowBits, memLevel, and strategy fields in the chunk to the encoding
88 * parameters needed to produce the right output.
89 */
90 bool ReconstructDeflateChunk();
91 bool IsAdjacentNormal(const ImageChunk& other) const;
92 void MergeAdjacentNormal(const ImageChunk& other);
93
94 /*
95 * Compute a bsdiff patch between |src| and |tgt|; Store the result in the patch_data.
96 * |bsdiff_cache| can be used to cache the suffix array if the same |src| chunk is used
97 * repeatedly, pass nullptr if not needed.
98 */
99 static bool MakePatch(const ImageChunk& tgt, const ImageChunk& src,
Alex Deymofa188262017-10-10 17:56:17 +0200100 std::vector<uint8_t>* patch_data,
101 bsdiff::SuffixArrayIndexInterface** bsdiff_cache);
Tianjie Xu57dd9612017-08-17 17:50:56 -0700102
103 private:
Tianjie Xu57dd9612017-08-17 17:50:56 -0700104 bool TryReconstruction(int level);
105
106 int type_; // CHUNK_NORMAL, CHUNK_DEFLATE, CHUNK_RAW
107 size_t start_; // offset of chunk in the original input file
108 const std::vector<uint8_t>* input_file_ptr_; // ptr to the full content of original input file
109 size_t raw_data_len_;
110
111 // deflate encoder parameters
112 int compress_level_;
113
114 // --- for CHUNK_DEFLATE chunks only: ---
115 std::vector<uint8_t> uncompressed_data_;
116 std::string entry_name_; // used for zip entries
117};
118
119// PatchChunk stores the patch data between a source chunk and a target chunk. It also keeps track
120// of the metadata of src&tgt chunks (e.g. offset, raw data length, uncompressed data length).
121class PatchChunk {
122 public:
123 PatchChunk(const ImageChunk& tgt, const ImageChunk& src, std::vector<uint8_t> data);
124
125 // Construct a CHUNK_RAW patch from the target data directly.
126 explicit PatchChunk(const ImageChunk& tgt);
127
128 // Return true if raw data size is smaller than the patch size.
129 static bool RawDataIsSmaller(const ImageChunk& tgt, size_t patch_size);
130
Tianjie Xu2903cdd2017-08-18 18:15:47 -0700131 // Update the source start with the new offset within the source range.
132 void UpdateSourceOffset(const SortedRangeSet& src_range);
133
Tianjie Xu82582b42017-08-31 18:05:19 -0700134 // Return the total size (header + data) of the patch.
135 size_t PatchSize() const;
136
Tianjie Xu57dd9612017-08-17 17:50:56 -0700137 static bool WritePatchDataToFd(const std::vector<PatchChunk>& patch_chunks, int patch_fd);
138
139 private:
140 size_t GetHeaderSize() const;
Tianjie Xu6e293c92017-11-15 16:26:41 -0800141 size_t WriteHeaderToFd(int fd, size_t offset, size_t index) const;
Tianjie Xu57dd9612017-08-17 17:50:56 -0700142
143 // The patch chunk type is the same as the target chunk type. The only exception is we change
144 // the |type_| to CHUNK_RAW if target length is smaller than the patch size.
145 int type_;
146
147 size_t source_start_;
148 size_t source_len_;
149 size_t source_uncompressed_len_;
150
151 size_t target_start_; // offset of the target chunk within the target file
152 size_t target_len_;
153 size_t target_uncompressed_len_;
154 size_t target_compress_level_; // the deflate compression level of the target chunk.
155
156 std::vector<uint8_t> data_; // storage for the patch data
157};
158
159// Interface for zip_mode and image_mode images. We initialize the image from an input file and
160// split the file content into a list of image chunks.
161class Image {
162 public:
163 explicit Image(bool is_source) : is_source_(is_source) {}
164
165 virtual ~Image() {}
166
167 // Create a list of image chunks from input file.
168 virtual bool Initialize(const std::string& filename) = 0;
169
170 // Look for runs of adjacent normal chunks and compress them down into a single chunk. (Such
171 // runs can be produced when deflate chunks are changed to normal chunks.)
172 void MergeAdjacentNormalChunks();
173
174 void DumpChunks() const;
175
176 // Non const iterators to access the stored ImageChunks.
177 std::vector<ImageChunk>::iterator begin() {
178 return chunks_.begin();
179 }
180
181 std::vector<ImageChunk>::iterator end() {
182 return chunks_.end();
183 }
184
Tianjie Xu2903cdd2017-08-18 18:15:47 -0700185 std::vector<ImageChunk>::const_iterator cbegin() const {
186 return chunks_.cbegin();
187 }
188
189 std::vector<ImageChunk>::const_iterator cend() const {
190 return chunks_.cend();
191 }
192
Tianjie Xu57dd9612017-08-17 17:50:56 -0700193 ImageChunk& operator[](size_t i);
194 const ImageChunk& operator[](size_t i) const;
195
196 size_t NumOfChunks() const {
197 return chunks_.size();
198 }
199
200 protected:
201 bool ReadFile(const std::string& filename, std::vector<uint8_t>* file_content);
202
203 bool is_source_; // True if it's for source chunks.
204 std::vector<ImageChunk> chunks_; // Internal storage of ImageChunk.
205 std::vector<uint8_t> file_content_; // Store the whole input file in memory.
206};
207
208class ZipModeImage : public Image {
209 public:
Tianjie Xu2903cdd2017-08-18 18:15:47 -0700210 explicit ZipModeImage(bool is_source, size_t limit = 0) : Image(is_source), limit_(limit) {}
Tianjie Xu57dd9612017-08-17 17:50:56 -0700211
212 bool Initialize(const std::string& filename) override;
213
Tianjie78d15142020-07-22 17:40:09 -0700214 // Initialize a fake ZipModeImage from an existing ImageChunk vector. For src img pieces, we
Tianjie Xu2903cdd2017-08-18 18:15:47 -0700215 // reconstruct a new file_content based on the source ranges; but it's not needed for the tgt img
216 // pieces; because for each chunk both the data and their offset within the file are unchanged.
217 void Initialize(const std::vector<ImageChunk>& chunks, const std::vector<uint8_t>& file_content) {
218 chunks_ = chunks;
219 file_content_ = file_content;
220 }
221
Tianjie Xu57dd9612017-08-17 17:50:56 -0700222 // The pesudo source chunk for bsdiff if there's no match for the given target chunk. It's in
223 // fact the whole source file.
224 ImageChunk PseudoSource() const;
225
226 // Find the matching deflate source chunk by entry name. Search for normal chunks also if
227 // |find_normal| is true.
228 ImageChunk* FindChunkByName(const std::string& name, bool find_normal = false);
229
230 const ImageChunk* FindChunkByName(const std::string& name, bool find_normal = false) const;
231
232 // Verify that we can reconstruct the deflate chunks; also change the type to CHUNK_NORMAL if
233 // src and tgt are identical.
234 static bool CheckAndProcessChunks(ZipModeImage* tgt_image, ZipModeImage* src_image);
235
236 // Compute the patch between tgt & src images, and write the data into |patch_name|.
237 static bool GeneratePatches(const ZipModeImage& tgt_image, const ZipModeImage& src_image,
238 const std::string& patch_name);
239
Tianjie Xu2903cdd2017-08-18 18:15:47 -0700240 // Compute the patch based on the lists of split src and tgt images. Generate patches for each
241 // pair of split pieces and write the data to |patch_name|. If |debug_dir| is specified, write
242 // each split src data and patch data into that directory.
243 static bool GeneratePatches(const std::vector<ZipModeImage>& split_tgt_images,
244 const std::vector<ZipModeImage>& split_src_images,
245 const std::vector<SortedRangeSet>& split_src_ranges,
Tianjie Xu82582b42017-08-31 18:05:19 -0700246 const std::string& patch_name, const std::string& split_info_file,
247 const std::string& debug_dir);
Tianjie Xu2903cdd2017-08-18 18:15:47 -0700248
249 // Split the tgt chunks and src chunks based on the size limit.
250 static bool SplitZipModeImageWithLimit(const ZipModeImage& tgt_image,
251 const ZipModeImage& src_image,
252 std::vector<ZipModeImage>* split_tgt_images,
253 std::vector<ZipModeImage>* split_src_images,
254 std::vector<SortedRangeSet>* split_src_ranges);
255
Tianjie Xu57dd9612017-08-17 17:50:56 -0700256 private:
257 // Initialize image chunks based on the zip entries.
258 bool InitializeChunks(const std::string& filename, ZipArchiveHandle handle);
259 // Add the a zip entry to the list.
Kelvin Zhang4f811302020-09-16 14:06:12 -0400260 bool AddZipEntryToChunks(ZipArchiveHandle handle, const std::string& entry_name,
261 ZipEntry64* entry);
Tianjie Xu57dd9612017-08-17 17:50:56 -0700262 // Return the real size of the zip file. (omit the trailing zeros that used for alignment)
263 bool GetZipFileSize(size_t* input_file_size);
Tianjie Xu2903cdd2017-08-18 18:15:47 -0700264
265 static void ValidateSplitImages(const std::vector<ZipModeImage>& split_tgt_images,
266 const std::vector<ZipModeImage>& split_src_images,
267 std::vector<SortedRangeSet>& split_src_ranges,
268 size_t total_tgt_size);
Tianjie78d15142020-07-22 17:40:09 -0700269 // Construct the fake split images based on the chunks info and source ranges; and move them into
Tianjie Xu572abbb2018-02-22 15:40:39 -0800270 // the given vectors. Return true if we add a new split image into |split_tgt_images|, and
271 // false otherwise.
272 static bool AddSplitImageFromChunkList(const ZipModeImage& tgt_image,
Tianjie Xu2903cdd2017-08-18 18:15:47 -0700273 const ZipModeImage& src_image,
274 const SortedRangeSet& split_src_ranges,
275 const std::vector<ImageChunk>& split_tgt_chunks,
276 const std::vector<ImageChunk>& split_src_chunks,
277 std::vector<ZipModeImage>* split_tgt_images,
278 std::vector<ZipModeImage>* split_src_images);
279
280 // Function that actually iterates the tgt_chunks and makes patches.
281 static bool GeneratePatchesInternal(const ZipModeImage& tgt_image, const ZipModeImage& src_image,
282 std::vector<PatchChunk>* patch_chunks);
283
284 // size limit in bytes of each chunk. Also, if the length of one zip_entry exceeds the limit,
285 // we'll split that entry into several smaller chunks in advance.
286 size_t limit_;
Tianjie Xu57dd9612017-08-17 17:50:56 -0700287};
288
289class ImageModeImage : public Image {
290 public:
291 explicit ImageModeImage(bool is_source) : Image(is_source) {}
292
293 // Initialize the image chunks list by searching the magic numbers in an image file.
294 bool Initialize(const std::string& filename) override;
295
296 bool SetBonusData(const std::vector<uint8_t>& bonus_data);
297
298 // In Image Mode, verify that the source and target images have the same chunk structure (ie, the
299 // same sequence of deflate and normal chunks).
300 static bool CheckAndProcessChunks(ImageModeImage* tgt_image, ImageModeImage* src_image);
301
302 // In image mode, generate patches against the given source chunks and bonus_data; write the
303 // result to |patch_name|.
304 static bool GeneratePatches(const ImageModeImage& tgt_image, const ImageModeImage& src_image,
305 const std::string& patch_name);
306};
307
308#endif // _APPLYPATCH_IMGDIFF_IMAGE_H