blob: f224a08be00b88aaf26e51532a99afb81be4e653 [file] [log] [blame]
Tao Bao8f237572017-03-26 13:36:49 -07001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#pragma once
18
19#include <stddef.h>
20
21#include <string>
Tao Baobf5b77d2017-03-30 16:57:29 -070022#include <utility>
Tao Bao8f237572017-03-26 13:36:49 -070023#include <vector>
24
25#include <android-base/logging.h>
26#include <android-base/parseint.h>
Tianjie Xub9e7fc72017-07-26 16:41:24 -070027#include <android-base/stringprintf.h>
Tao Bao8f237572017-03-26 13:36:49 -070028#include <android-base/strings.h>
29
Tao Baobf5b77d2017-03-30 16:57:29 -070030using Range = std::pair<size_t, size_t>;
31
32class RangeSet {
33 public:
34 RangeSet() : blocks_(0) {}
35
36 explicit RangeSet(std::vector<Range>&& pairs) {
37 CHECK_NE(pairs.size(), static_cast<size_t>(0)) << "Invalid number of tokens";
38
39 // Sanity check the input.
40 size_t result = 0;
41 for (const auto& range : pairs) {
42 CHECK_LT(range.first, range.second)
43 << "Empty or negative range: " << range.first << ", " << range.second;
44 size_t sz = range.second - range.first;
45 CHECK_LE(result, SIZE_MAX - sz) << "RangeSet size overflow";
46 result += sz;
47 }
48
49 ranges_ = pairs;
50 blocks_ = result;
51 }
Tao Bao8f237572017-03-26 13:36:49 -070052
53 static RangeSet Parse(const std::string& range_text) {
54 std::vector<std::string> pieces = android::base::Split(range_text, ",");
55 CHECK_GE(pieces.size(), static_cast<size_t>(3)) << "Invalid range text: " << range_text;
56
57 size_t num;
58 CHECK(android::base::ParseUint(pieces[0], &num, static_cast<size_t>(INT_MAX)))
59 << "Failed to parse the number of tokens: " << range_text;
60
61 CHECK_NE(num, static_cast<size_t>(0)) << "Invalid number of tokens: " << range_text;
62 CHECK_EQ(num % 2, static_cast<size_t>(0)) << "Number of tokens must be even: " << range_text;
63 CHECK_EQ(num, pieces.size() - 1) << "Mismatching number of tokens: " << range_text;
64
Tao Baobf5b77d2017-03-30 16:57:29 -070065 std::vector<Range> pairs;
Tao Bao8f237572017-03-26 13:36:49 -070066 for (size_t i = 0; i < num; i += 2) {
Tao Baobf5b77d2017-03-30 16:57:29 -070067 size_t first;
68 CHECK(android::base::ParseUint(pieces[i + 1], &first, static_cast<size_t>(INT_MAX)));
69 size_t second;
70 CHECK(android::base::ParseUint(pieces[i + 2], &second, static_cast<size_t>(INT_MAX)));
Tao Bao8f237572017-03-26 13:36:49 -070071
Tao Baobf5b77d2017-03-30 16:57:29 -070072 pairs.emplace_back(first, second);
Tao Bao8f237572017-03-26 13:36:49 -070073 }
74
Tao Baobf5b77d2017-03-30 16:57:29 -070075 return RangeSet(std::move(pairs));
Tao Bao8f237572017-03-26 13:36:49 -070076 }
77
Tianjie Xub9e7fc72017-07-26 16:41:24 -070078 std::string ToString() const {
79 if (ranges_.empty()) {
80 return "";
81 }
82 std::string result = std::to_string(ranges_.size() * 2);
83 for (const auto& r : ranges_) {
84 result += android::base::StringPrintf(",%zu,%zu", r.first, r.second);
85 }
86
87 return result;
88 }
89
Tao Bao8f237572017-03-26 13:36:49 -070090 // Get the block number for the i-th (starting from 0) block in the RangeSet.
91 size_t GetBlockNumber(size_t idx) const {
Tao Baobf5b77d2017-03-30 16:57:29 -070092 CHECK_LT(idx, blocks_) << "Out of bound index " << idx << " (total blocks: " << blocks_ << ")";
93
94 for (const auto& range : ranges_) {
95 if (idx < range.second - range.first) {
96 return range.first + idx;
Tao Bao8f237572017-03-26 13:36:49 -070097 }
Tao Baobf5b77d2017-03-30 16:57:29 -070098 idx -= (range.second - range.first);
Tao Bao8f237572017-03-26 13:36:49 -070099 }
Tao Baobf5b77d2017-03-30 16:57:29 -0700100
101 CHECK(false) << "Failed to find block number for index " << idx;
Tao Bao8f237572017-03-26 13:36:49 -0700102 return 0; // Unreachable, but to make compiler happy.
103 }
104
105 // RangeSet has half-closed half-open bounds. For example, "3,5" contains blocks 3 and 4. So "3,5"
106 // and "5,7" are not overlapped.
107 bool Overlaps(const RangeSet& other) const {
Tao Baobf5b77d2017-03-30 16:57:29 -0700108 for (const auto& range : ranges_) {
109 size_t start = range.first;
110 size_t end = range.second;
111 for (const auto& other_range : other.ranges_) {
112 size_t other_start = other_range.first;
113 size_t other_end = other_range.second;
Tao Bao8f237572017-03-26 13:36:49 -0700114 // [start, end) vs [other_start, other_end)
115 if (!(other_start >= end || start >= other_end)) {
116 return true;
117 }
118 }
119 }
120 return false;
121 }
122
Tao Baobf5b77d2017-03-30 16:57:29 -0700123 // size() gives the number of Range's in this RangeSet.
124 size_t size() const {
125 return ranges_.size();
Tao Bao8f237572017-03-26 13:36:49 -0700126 }
Tao Baobf5b77d2017-03-30 16:57:29 -0700127
128 // blocks() gives the number of all blocks in this RangeSet.
129 size_t blocks() const {
130 return blocks_;
131 }
132
133 // We provide const iterators only.
134 std::vector<Range>::const_iterator cbegin() const {
135 return ranges_.cbegin();
136 }
137
138 std::vector<Range>::const_iterator cend() const {
139 return ranges_.cend();
140 }
141
142 // Need to provide begin()/end() since range-based loop expects begin()/end().
143 std::vector<Range>::const_iterator begin() const {
144 return ranges_.cbegin();
145 }
146
147 std::vector<Range>::const_iterator end() const {
148 return ranges_.cend();
149 }
150
151 // Reverse const iterators for MoveRange().
152 std::vector<Range>::const_reverse_iterator crbegin() const {
153 return ranges_.crbegin();
154 }
155
156 std::vector<Range>::const_reverse_iterator crend() const {
157 return ranges_.crend();
158 }
159
160 const Range& operator[](size_t i) const {
161 return ranges_[i];
162 }
163
164 bool operator==(const RangeSet& other) const {
165 // The orders of Range's matter. "4,1,5,8,10" != "4,8,10,1,5".
166 return (ranges_ == other.ranges_);
167 }
168
169 bool operator!=(const RangeSet& other) const {
170 return ranges_ != other.ranges_;
171 }
172
Tianjie Xub9e7fc72017-07-26 16:41:24 -0700173 protected:
Tao Baobf5b77d2017-03-30 16:57:29 -0700174 // Actual limit for each value and the total number are both INT_MAX.
175 std::vector<Range> ranges_;
176 size_t blocks_;
Tao Bao8f237572017-03-26 13:36:49 -0700177};
Tianjie Xub9e7fc72017-07-26 16:41:24 -0700178
179static constexpr size_t kBlockSize = 4096;
180
181// The class is a sorted version of a RangeSet; and it's useful in imgdiff to split the input
182// files when we're handling large zip files. Specifically, we can treat the input file as a
183// continuous RangeSet (i.e. RangeSet("0-99") for a 100 blocks file); and break it down into
184// several smaller chunks based on the zip entries.
185
186// For example, [source: 0-99] can be split into
187// [split_src1: 10-29]; [split_src2: 40-49, 60-69]; [split_src3: 70-89]
188// Here "10-29" simply means block 10th to block 29th with respect to the original input file.
189// Also, note that the split sources should be mutual exclusive, but they don't need to cover
190// every block in the original source.
191class SortedRangeSet : public RangeSet {
192 public:
193 SortedRangeSet() {}
194
195 // Ranges in the the set should be mutually exclusive; and they're sorted by the start block.
196 explicit SortedRangeSet(std::vector<Range>&& pairs) : RangeSet(std::move(pairs)) {
197 std::sort(ranges_.begin(), ranges_.end());
198 }
199
200 void Insert(const Range& to_insert) {
201 SortedRangeSet rs({ to_insert });
202 Insert(rs);
203 }
204
205 // Insert the input SortedRangeSet; keep the ranges sorted and merge the overlap ranges.
206 void Insert(const SortedRangeSet& rs) {
207 if (rs.size() == 0) {
208 return;
209 }
210 // Merge and sort the two RangeSets.
211 std::vector<Range> temp = std::move(ranges_);
212 std::copy(rs.begin(), rs.end(), std::back_inserter(temp));
213 std::sort(temp.begin(), temp.end());
214
215 Clear();
216 // Trim overlaps and insert the result back to ranges_.
217 Range to_insert = temp.front();
218 for (auto it = temp.cbegin() + 1; it != temp.cend(); it++) {
219 if (it->first <= to_insert.second) {
220 to_insert.second = std::max(to_insert.second, it->second);
221 } else {
222 ranges_.push_back(to_insert);
223 blocks_ += (to_insert.second - to_insert.first);
224 to_insert = *it;
225 }
226 }
227 ranges_.push_back(to_insert);
228 blocks_ += (to_insert.second - to_insert.first);
229 }
230
231 void Clear() {
232 blocks_ = 0;
233 ranges_.clear();
234 }
235
236 using RangeSet::Overlaps;
237 bool Overlaps(size_t start, size_t len) const {
238 RangeSet rs({ { start / kBlockSize, (start + len - 1) / kBlockSize + 1 } });
239 return Overlaps(rs);
240 }
241
242 // Compute the block range the file occupies, and insert that range.
243 void Insert(size_t start, size_t len) {
244 Range to_insert{ start / kBlockSize, (start + len - 1) / kBlockSize + 1 };
245 Insert(to_insert);
246 }
247
248 // Given an offset of the file, checks if the corresponding block (by considering the file as
249 // 0-based continuous block ranges) is covered by the SortedRangeSet. If so, returns the offset
250 // within this SortedRangeSet.
251 //
252 // For example, the 4106-th byte of a file is from block 1, assuming a block size of 4096-byte.
253 // The mapped offset within a SortedRangeSet("1-9 15-19") is 10.
254 //
255 // An offset of 65546 falls into the 16-th block in a file. Block 16 is contained as the 10-th
256 // item in SortedRangeSet("1-9 15-19"). So its data can be found at offset 40970 (i.e. 4096 * 10
257 // + 10) in a range represented by this SortedRangeSet.
258 size_t GetOffsetInRangeSet(size_t old_offset) const {
259 size_t old_block_start = old_offset / kBlockSize;
260 size_t new_block_start = 0;
261 for (const auto& range : ranges_) {
262 // Find the index of old_block_start.
263 if (old_block_start >= range.second) {
264 new_block_start += (range.second - range.first);
265 } else if (old_block_start >= range.first) {
266 new_block_start += (old_block_start - range.first);
267 return (new_block_start * kBlockSize + old_offset % kBlockSize);
268 } else {
Tianjie Xu57dd9612017-08-17 17:50:56 -0700269 CHECK(false) << "block_start " << old_block_start
270 << " is missing between two ranges: " << this->ToString();
Tianjie Xub9e7fc72017-07-26 16:41:24 -0700271 return 0;
272 }
273 }
Tianjie Xu57dd9612017-08-17 17:50:56 -0700274 CHECK(false) << "block_start " << old_block_start
275 << " exceeds the limit of current RangeSet: " << this->ToString();
Tianjie Xub9e7fc72017-07-26 16:41:24 -0700276 return 0;
277 }
278};