Tianjie Xu | 57dd961 | 2017-08-17 17:50:56 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2017 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #ifndef _APPLYPATCH_IMGDIFF_IMAGE_H |
| 18 | #define _APPLYPATCH_IMGDIFF_IMAGE_H |
| 19 | |
| 20 | #include <stddef.h> |
| 21 | #include <stdio.h> |
| 22 | #include <sys/types.h> |
| 23 | |
| 24 | #include <string> |
| 25 | #include <vector> |
| 26 | |
Alex Deymo | fa18826 | 2017-10-10 17:56:17 +0200 | [diff] [blame] | 27 | #include <bsdiff/bsdiff.h> |
Tianjie Xu | 57dd961 | 2017-08-17 17:50:56 -0700 | [diff] [blame] | 28 | #include <ziparchive/zip_archive.h> |
| 29 | #include <zlib.h> |
| 30 | |
| 31 | #include "imgdiff.h" |
Tao Bao | 09e468f | 2017-09-29 14:39:33 -0700 | [diff] [blame] | 32 | #include "otautil/rangeset.h" |
Tianjie Xu | 57dd961 | 2017-08-17 17:50:56 -0700 | [diff] [blame] | 33 | |
| 34 | class ImageChunk { |
| 35 | public: |
| 36 | static constexpr auto WINDOWBITS = -15; // 32kb window; negative to indicate a raw stream. |
| 37 | static constexpr auto MEMLEVEL = 8; // the default value. |
| 38 | static constexpr auto METHOD = Z_DEFLATED; |
| 39 | static constexpr auto STRATEGY = Z_DEFAULT_STRATEGY; |
| 40 | |
| 41 | ImageChunk(int type, size_t start, const std::vector<uint8_t>* file_content, size_t raw_data_len, |
| 42 | std::string entry_name = {}); |
| 43 | |
| 44 | int GetType() const { |
| 45 | return type_; |
| 46 | } |
| 47 | size_t GetRawDataLength() const { |
| 48 | return raw_data_len_; |
| 49 | } |
| 50 | const std::string& GetEntryName() const { |
| 51 | return entry_name_; |
| 52 | } |
| 53 | size_t GetStartOffset() const { |
| 54 | return start_; |
| 55 | } |
| 56 | int GetCompressLevel() const { |
| 57 | return compress_level_; |
| 58 | } |
| 59 | |
| 60 | // CHUNK_DEFLATE will return the uncompressed data for diff, while other types will simply return |
| 61 | // the raw data. |
| 62 | const uint8_t* DataForPatch() const; |
| 63 | size_t DataLengthForPatch() const; |
| 64 | |
Tianjie Xu | 6e293c9 | 2017-11-15 16:26:41 -0800 | [diff] [blame] | 65 | void Dump(size_t index) const; |
Tianjie Xu | 57dd961 | 2017-08-17 17:50:56 -0700 | [diff] [blame] | 66 | |
| 67 | void SetUncompressedData(std::vector<uint8_t> data); |
| 68 | bool SetBonusData(const std::vector<uint8_t>& bonus_data); |
| 69 | |
| 70 | bool operator==(const ImageChunk& other) const; |
| 71 | bool operator!=(const ImageChunk& other) const { |
| 72 | return !(*this == other); |
| 73 | } |
| 74 | |
| 75 | /* |
| 76 | * Cause a gzip chunk to be treated as a normal chunk (ie, as a blob of uninterpreted data). |
| 77 | * The resulting patch will likely be about as big as the target file, but it lets us handle |
| 78 | * the case of images where some gzip chunks are reconstructible but others aren't (by treating |
| 79 | * the ones that aren't as normal chunks). |
| 80 | */ |
| 81 | void ChangeDeflateChunkToNormal(); |
| 82 | |
| 83 | /* |
| 84 | * Verify that we can reproduce exactly the same compressed data that we started with. Sets the |
| 85 | * level, method, windowBits, memLevel, and strategy fields in the chunk to the encoding |
| 86 | * parameters needed to produce the right output. |
| 87 | */ |
| 88 | bool ReconstructDeflateChunk(); |
| 89 | bool IsAdjacentNormal(const ImageChunk& other) const; |
| 90 | void MergeAdjacentNormal(const ImageChunk& other); |
| 91 | |
| 92 | /* |
| 93 | * Compute a bsdiff patch between |src| and |tgt|; Store the result in the patch_data. |
| 94 | * |bsdiff_cache| can be used to cache the suffix array if the same |src| chunk is used |
| 95 | * repeatedly, pass nullptr if not needed. |
| 96 | */ |
| 97 | static bool MakePatch(const ImageChunk& tgt, const ImageChunk& src, |
Alex Deymo | fa18826 | 2017-10-10 17:56:17 +0200 | [diff] [blame] | 98 | std::vector<uint8_t>* patch_data, |
| 99 | bsdiff::SuffixArrayIndexInterface** bsdiff_cache); |
Tianjie Xu | 57dd961 | 2017-08-17 17:50:56 -0700 | [diff] [blame] | 100 | |
| 101 | private: |
| 102 | const uint8_t* GetRawData() const; |
| 103 | bool TryReconstruction(int level); |
| 104 | |
| 105 | int type_; // CHUNK_NORMAL, CHUNK_DEFLATE, CHUNK_RAW |
| 106 | size_t start_; // offset of chunk in the original input file |
| 107 | const std::vector<uint8_t>* input_file_ptr_; // ptr to the full content of original input file |
| 108 | size_t raw_data_len_; |
| 109 | |
| 110 | // deflate encoder parameters |
| 111 | int compress_level_; |
| 112 | |
| 113 | // --- for CHUNK_DEFLATE chunks only: --- |
| 114 | std::vector<uint8_t> uncompressed_data_; |
| 115 | std::string entry_name_; // used for zip entries |
| 116 | }; |
| 117 | |
| 118 | // PatchChunk stores the patch data between a source chunk and a target chunk. It also keeps track |
| 119 | // of the metadata of src&tgt chunks (e.g. offset, raw data length, uncompressed data length). |
| 120 | class PatchChunk { |
| 121 | public: |
| 122 | PatchChunk(const ImageChunk& tgt, const ImageChunk& src, std::vector<uint8_t> data); |
| 123 | |
| 124 | // Construct a CHUNK_RAW patch from the target data directly. |
| 125 | explicit PatchChunk(const ImageChunk& tgt); |
| 126 | |
| 127 | // Return true if raw data size is smaller than the patch size. |
| 128 | static bool RawDataIsSmaller(const ImageChunk& tgt, size_t patch_size); |
| 129 | |
Tianjie Xu | 2903cdd | 2017-08-18 18:15:47 -0700 | [diff] [blame] | 130 | // Update the source start with the new offset within the source range. |
| 131 | void UpdateSourceOffset(const SortedRangeSet& src_range); |
| 132 | |
Tianjie Xu | 82582b4 | 2017-08-31 18:05:19 -0700 | [diff] [blame] | 133 | // Return the total size (header + data) of the patch. |
| 134 | size_t PatchSize() const; |
| 135 | |
Tianjie Xu | 57dd961 | 2017-08-17 17:50:56 -0700 | [diff] [blame] | 136 | static bool WritePatchDataToFd(const std::vector<PatchChunk>& patch_chunks, int patch_fd); |
| 137 | |
| 138 | private: |
| 139 | size_t GetHeaderSize() const; |
Tianjie Xu | 6e293c9 | 2017-11-15 16:26:41 -0800 | [diff] [blame] | 140 | size_t WriteHeaderToFd(int fd, size_t offset, size_t index) const; |
Tianjie Xu | 57dd961 | 2017-08-17 17:50:56 -0700 | [diff] [blame] | 141 | |
| 142 | // The patch chunk type is the same as the target chunk type. The only exception is we change |
| 143 | // the |type_| to CHUNK_RAW if target length is smaller than the patch size. |
| 144 | int type_; |
| 145 | |
| 146 | size_t source_start_; |
| 147 | size_t source_len_; |
| 148 | size_t source_uncompressed_len_; |
| 149 | |
| 150 | size_t target_start_; // offset of the target chunk within the target file |
| 151 | size_t target_len_; |
| 152 | size_t target_uncompressed_len_; |
| 153 | size_t target_compress_level_; // the deflate compression level of the target chunk. |
| 154 | |
| 155 | std::vector<uint8_t> data_; // storage for the patch data |
| 156 | }; |
| 157 | |
| 158 | // Interface for zip_mode and image_mode images. We initialize the image from an input file and |
| 159 | // split the file content into a list of image chunks. |
| 160 | class Image { |
| 161 | public: |
| 162 | explicit Image(bool is_source) : is_source_(is_source) {} |
| 163 | |
| 164 | virtual ~Image() {} |
| 165 | |
| 166 | // Create a list of image chunks from input file. |
| 167 | virtual bool Initialize(const std::string& filename) = 0; |
| 168 | |
| 169 | // Look for runs of adjacent normal chunks and compress them down into a single chunk. (Such |
| 170 | // runs can be produced when deflate chunks are changed to normal chunks.) |
| 171 | void MergeAdjacentNormalChunks(); |
| 172 | |
| 173 | void DumpChunks() const; |
| 174 | |
| 175 | // Non const iterators to access the stored ImageChunks. |
| 176 | std::vector<ImageChunk>::iterator begin() { |
| 177 | return chunks_.begin(); |
| 178 | } |
| 179 | |
| 180 | std::vector<ImageChunk>::iterator end() { |
| 181 | return chunks_.end(); |
| 182 | } |
| 183 | |
Tianjie Xu | 2903cdd | 2017-08-18 18:15:47 -0700 | [diff] [blame] | 184 | std::vector<ImageChunk>::const_iterator cbegin() const { |
| 185 | return chunks_.cbegin(); |
| 186 | } |
| 187 | |
| 188 | std::vector<ImageChunk>::const_iterator cend() const { |
| 189 | return chunks_.cend(); |
| 190 | } |
| 191 | |
Tianjie Xu | 57dd961 | 2017-08-17 17:50:56 -0700 | [diff] [blame] | 192 | ImageChunk& operator[](size_t i); |
| 193 | const ImageChunk& operator[](size_t i) const; |
| 194 | |
| 195 | size_t NumOfChunks() const { |
| 196 | return chunks_.size(); |
| 197 | } |
| 198 | |
| 199 | protected: |
| 200 | bool ReadFile(const std::string& filename, std::vector<uint8_t>* file_content); |
| 201 | |
| 202 | bool is_source_; // True if it's for source chunks. |
| 203 | std::vector<ImageChunk> chunks_; // Internal storage of ImageChunk. |
| 204 | std::vector<uint8_t> file_content_; // Store the whole input file in memory. |
| 205 | }; |
| 206 | |
| 207 | class ZipModeImage : public Image { |
| 208 | public: |
Tianjie Xu | 2903cdd | 2017-08-18 18:15:47 -0700 | [diff] [blame] | 209 | explicit ZipModeImage(bool is_source, size_t limit = 0) : Image(is_source), limit_(limit) {} |
Tianjie Xu | 57dd961 | 2017-08-17 17:50:56 -0700 | [diff] [blame] | 210 | |
| 211 | bool Initialize(const std::string& filename) override; |
| 212 | |
Tianjie Xu | 2903cdd | 2017-08-18 18:15:47 -0700 | [diff] [blame] | 213 | // Initialize a dummy ZipModeImage from an existing ImageChunk vector. For src img pieces, we |
| 214 | // reconstruct a new file_content based on the source ranges; but it's not needed for the tgt img |
| 215 | // pieces; because for each chunk both the data and their offset within the file are unchanged. |
| 216 | void Initialize(const std::vector<ImageChunk>& chunks, const std::vector<uint8_t>& file_content) { |
| 217 | chunks_ = chunks; |
| 218 | file_content_ = file_content; |
| 219 | } |
| 220 | |
Tianjie Xu | 57dd961 | 2017-08-17 17:50:56 -0700 | [diff] [blame] | 221 | // The pesudo source chunk for bsdiff if there's no match for the given target chunk. It's in |
| 222 | // fact the whole source file. |
| 223 | ImageChunk PseudoSource() const; |
| 224 | |
| 225 | // Find the matching deflate source chunk by entry name. Search for normal chunks also if |
| 226 | // |find_normal| is true. |
| 227 | ImageChunk* FindChunkByName(const std::string& name, bool find_normal = false); |
| 228 | |
| 229 | const ImageChunk* FindChunkByName(const std::string& name, bool find_normal = false) const; |
| 230 | |
| 231 | // Verify that we can reconstruct the deflate chunks; also change the type to CHUNK_NORMAL if |
| 232 | // src and tgt are identical. |
| 233 | static bool CheckAndProcessChunks(ZipModeImage* tgt_image, ZipModeImage* src_image); |
| 234 | |
| 235 | // Compute the patch between tgt & src images, and write the data into |patch_name|. |
| 236 | static bool GeneratePatches(const ZipModeImage& tgt_image, const ZipModeImage& src_image, |
| 237 | const std::string& patch_name); |
| 238 | |
Tianjie Xu | 2903cdd | 2017-08-18 18:15:47 -0700 | [diff] [blame] | 239 | // Compute the patch based on the lists of split src and tgt images. Generate patches for each |
| 240 | // pair of split pieces and write the data to |patch_name|. If |debug_dir| is specified, write |
| 241 | // each split src data and patch data into that directory. |
| 242 | static bool GeneratePatches(const std::vector<ZipModeImage>& split_tgt_images, |
| 243 | const std::vector<ZipModeImage>& split_src_images, |
| 244 | const std::vector<SortedRangeSet>& split_src_ranges, |
Tianjie Xu | 82582b4 | 2017-08-31 18:05:19 -0700 | [diff] [blame] | 245 | const std::string& patch_name, const std::string& split_info_file, |
| 246 | const std::string& debug_dir); |
Tianjie Xu | 2903cdd | 2017-08-18 18:15:47 -0700 | [diff] [blame] | 247 | |
| 248 | // Split the tgt chunks and src chunks based on the size limit. |
| 249 | static bool SplitZipModeImageWithLimit(const ZipModeImage& tgt_image, |
| 250 | const ZipModeImage& src_image, |
| 251 | std::vector<ZipModeImage>* split_tgt_images, |
| 252 | std::vector<ZipModeImage>* split_src_images, |
| 253 | std::vector<SortedRangeSet>* split_src_ranges); |
| 254 | |
Tianjie Xu | 57dd961 | 2017-08-17 17:50:56 -0700 | [diff] [blame] | 255 | private: |
| 256 | // Initialize image chunks based on the zip entries. |
| 257 | bool InitializeChunks(const std::string& filename, ZipArchiveHandle handle); |
| 258 | // Add the a zip entry to the list. |
| 259 | bool AddZipEntryToChunks(ZipArchiveHandle handle, const std::string& entry_name, ZipEntry* entry); |
| 260 | // Return the real size of the zip file. (omit the trailing zeros that used for alignment) |
| 261 | bool GetZipFileSize(size_t* input_file_size); |
Tianjie Xu | 2903cdd | 2017-08-18 18:15:47 -0700 | [diff] [blame] | 262 | |
| 263 | static void ValidateSplitImages(const std::vector<ZipModeImage>& split_tgt_images, |
| 264 | const std::vector<ZipModeImage>& split_src_images, |
| 265 | std::vector<SortedRangeSet>& split_src_ranges, |
| 266 | size_t total_tgt_size); |
| 267 | // Construct the dummy split images based on the chunks info and source ranges; and move them into |
Tianjie Xu | 572abbb | 2018-02-22 15:40:39 -0800 | [diff] [blame] | 268 | // the given vectors. Return true if we add a new split image into |split_tgt_images|, and |
| 269 | // false otherwise. |
| 270 | static bool AddSplitImageFromChunkList(const ZipModeImage& tgt_image, |
Tianjie Xu | 2903cdd | 2017-08-18 18:15:47 -0700 | [diff] [blame] | 271 | const ZipModeImage& src_image, |
| 272 | const SortedRangeSet& split_src_ranges, |
| 273 | const std::vector<ImageChunk>& split_tgt_chunks, |
| 274 | const std::vector<ImageChunk>& split_src_chunks, |
| 275 | std::vector<ZipModeImage>* split_tgt_images, |
| 276 | std::vector<ZipModeImage>* split_src_images); |
| 277 | |
| 278 | // Function that actually iterates the tgt_chunks and makes patches. |
| 279 | static bool GeneratePatchesInternal(const ZipModeImage& tgt_image, const ZipModeImage& src_image, |
| 280 | std::vector<PatchChunk>* patch_chunks); |
| 281 | |
| 282 | // size limit in bytes of each chunk. Also, if the length of one zip_entry exceeds the limit, |
| 283 | // we'll split that entry into several smaller chunks in advance. |
| 284 | size_t limit_; |
Tianjie Xu | 57dd961 | 2017-08-17 17:50:56 -0700 | [diff] [blame] | 285 | }; |
| 286 | |
| 287 | class ImageModeImage : public Image { |
| 288 | public: |
| 289 | explicit ImageModeImage(bool is_source) : Image(is_source) {} |
| 290 | |
| 291 | // Initialize the image chunks list by searching the magic numbers in an image file. |
| 292 | bool Initialize(const std::string& filename) override; |
| 293 | |
| 294 | bool SetBonusData(const std::vector<uint8_t>& bonus_data); |
| 295 | |
| 296 | // In Image Mode, verify that the source and target images have the same chunk structure (ie, the |
| 297 | // same sequence of deflate and normal chunks). |
| 298 | static bool CheckAndProcessChunks(ImageModeImage* tgt_image, ImageModeImage* src_image); |
| 299 | |
| 300 | // In image mode, generate patches against the given source chunks and bonus_data; write the |
| 301 | // result to |patch_name|. |
| 302 | static bool GeneratePatches(const ImageModeImage& tgt_image, const ImageModeImage& src_image, |
| 303 | const std::string& patch_name); |
| 304 | }; |
| 305 | |
| 306 | #endif // _APPLYPATCH_IMGDIFF_IMAGE_H |