Tao Bao | 8f23757 | 2017-03-26 13:36:49 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2017 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #pragma once |
| 18 | |
| 19 | #include <stddef.h> |
| 20 | |
| 21 | #include <string> |
Tao Bao | bf5b77d | 2017-03-30 16:57:29 -0700 | [diff] [blame] | 22 | #include <utility> |
Tao Bao | 8f23757 | 2017-03-26 13:36:49 -0700 | [diff] [blame] | 23 | #include <vector> |
| 24 | |
| 25 | #include <android-base/logging.h> |
| 26 | #include <android-base/parseint.h> |
Tianjie Xu | b9e7fc7 | 2017-07-26 16:41:24 -0700 | [diff] [blame] | 27 | #include <android-base/stringprintf.h> |
Tao Bao | 8f23757 | 2017-03-26 13:36:49 -0700 | [diff] [blame] | 28 | #include <android-base/strings.h> |
| 29 | |
Tao Bao | bf5b77d | 2017-03-30 16:57:29 -0700 | [diff] [blame] | 30 | using Range = std::pair<size_t, size_t>; |
| 31 | |
| 32 | class RangeSet { |
| 33 | public: |
| 34 | RangeSet() : blocks_(0) {} |
| 35 | |
| 36 | explicit RangeSet(std::vector<Range>&& pairs) { |
| 37 | CHECK_NE(pairs.size(), static_cast<size_t>(0)) << "Invalid number of tokens"; |
| 38 | |
| 39 | // Sanity check the input. |
| 40 | size_t result = 0; |
| 41 | for (const auto& range : pairs) { |
| 42 | CHECK_LT(range.first, range.second) |
| 43 | << "Empty or negative range: " << range.first << ", " << range.second; |
| 44 | size_t sz = range.second - range.first; |
| 45 | CHECK_LE(result, SIZE_MAX - sz) << "RangeSet size overflow"; |
| 46 | result += sz; |
| 47 | } |
| 48 | |
| 49 | ranges_ = pairs; |
| 50 | blocks_ = result; |
| 51 | } |
Tao Bao | 8f23757 | 2017-03-26 13:36:49 -0700 | [diff] [blame] | 52 | |
| 53 | static RangeSet Parse(const std::string& range_text) { |
| 54 | std::vector<std::string> pieces = android::base::Split(range_text, ","); |
| 55 | CHECK_GE(pieces.size(), static_cast<size_t>(3)) << "Invalid range text: " << range_text; |
| 56 | |
| 57 | size_t num; |
| 58 | CHECK(android::base::ParseUint(pieces[0], &num, static_cast<size_t>(INT_MAX))) |
| 59 | << "Failed to parse the number of tokens: " << range_text; |
| 60 | |
| 61 | CHECK_NE(num, static_cast<size_t>(0)) << "Invalid number of tokens: " << range_text; |
| 62 | CHECK_EQ(num % 2, static_cast<size_t>(0)) << "Number of tokens must be even: " << range_text; |
| 63 | CHECK_EQ(num, pieces.size() - 1) << "Mismatching number of tokens: " << range_text; |
| 64 | |
Tao Bao | bf5b77d | 2017-03-30 16:57:29 -0700 | [diff] [blame] | 65 | std::vector<Range> pairs; |
Tao Bao | 8f23757 | 2017-03-26 13:36:49 -0700 | [diff] [blame] | 66 | for (size_t i = 0; i < num; i += 2) { |
Tao Bao | bf5b77d | 2017-03-30 16:57:29 -0700 | [diff] [blame] | 67 | size_t first; |
| 68 | CHECK(android::base::ParseUint(pieces[i + 1], &first, static_cast<size_t>(INT_MAX))); |
| 69 | size_t second; |
| 70 | CHECK(android::base::ParseUint(pieces[i + 2], &second, static_cast<size_t>(INT_MAX))); |
Tao Bao | 8f23757 | 2017-03-26 13:36:49 -0700 | [diff] [blame] | 71 | |
Tao Bao | bf5b77d | 2017-03-30 16:57:29 -0700 | [diff] [blame] | 72 | pairs.emplace_back(first, second); |
Tao Bao | 8f23757 | 2017-03-26 13:36:49 -0700 | [diff] [blame] | 73 | } |
| 74 | |
Tao Bao | bf5b77d | 2017-03-30 16:57:29 -0700 | [diff] [blame] | 75 | return RangeSet(std::move(pairs)); |
Tao Bao | 8f23757 | 2017-03-26 13:36:49 -0700 | [diff] [blame] | 76 | } |
| 77 | |
Tianjie Xu | b9e7fc7 | 2017-07-26 16:41:24 -0700 | [diff] [blame] | 78 | std::string ToString() const { |
| 79 | if (ranges_.empty()) { |
| 80 | return ""; |
| 81 | } |
| 82 | std::string result = std::to_string(ranges_.size() * 2); |
| 83 | for (const auto& r : ranges_) { |
| 84 | result += android::base::StringPrintf(",%zu,%zu", r.first, r.second); |
| 85 | } |
| 86 | |
| 87 | return result; |
| 88 | } |
| 89 | |
Tao Bao | 8f23757 | 2017-03-26 13:36:49 -0700 | [diff] [blame] | 90 | // Get the block number for the i-th (starting from 0) block in the RangeSet. |
| 91 | size_t GetBlockNumber(size_t idx) const { |
Tao Bao | bf5b77d | 2017-03-30 16:57:29 -0700 | [diff] [blame] | 92 | CHECK_LT(idx, blocks_) << "Out of bound index " << idx << " (total blocks: " << blocks_ << ")"; |
| 93 | |
| 94 | for (const auto& range : ranges_) { |
| 95 | if (idx < range.second - range.first) { |
| 96 | return range.first + idx; |
Tao Bao | 8f23757 | 2017-03-26 13:36:49 -0700 | [diff] [blame] | 97 | } |
Tao Bao | bf5b77d | 2017-03-30 16:57:29 -0700 | [diff] [blame] | 98 | idx -= (range.second - range.first); |
Tao Bao | 8f23757 | 2017-03-26 13:36:49 -0700 | [diff] [blame] | 99 | } |
Tao Bao | bf5b77d | 2017-03-30 16:57:29 -0700 | [diff] [blame] | 100 | |
| 101 | CHECK(false) << "Failed to find block number for index " << idx; |
Tao Bao | 8f23757 | 2017-03-26 13:36:49 -0700 | [diff] [blame] | 102 | return 0; // Unreachable, but to make compiler happy. |
| 103 | } |
| 104 | |
| 105 | // RangeSet has half-closed half-open bounds. For example, "3,5" contains blocks 3 and 4. So "3,5" |
| 106 | // and "5,7" are not overlapped. |
| 107 | bool Overlaps(const RangeSet& other) const { |
Tao Bao | bf5b77d | 2017-03-30 16:57:29 -0700 | [diff] [blame] | 108 | for (const auto& range : ranges_) { |
| 109 | size_t start = range.first; |
| 110 | size_t end = range.second; |
| 111 | for (const auto& other_range : other.ranges_) { |
| 112 | size_t other_start = other_range.first; |
| 113 | size_t other_end = other_range.second; |
Tao Bao | 8f23757 | 2017-03-26 13:36:49 -0700 | [diff] [blame] | 114 | // [start, end) vs [other_start, other_end) |
| 115 | if (!(other_start >= end || start >= other_end)) { |
| 116 | return true; |
| 117 | } |
| 118 | } |
| 119 | } |
| 120 | return false; |
| 121 | } |
| 122 | |
Tao Bao | bf5b77d | 2017-03-30 16:57:29 -0700 | [diff] [blame] | 123 | // size() gives the number of Range's in this RangeSet. |
| 124 | size_t size() const { |
| 125 | return ranges_.size(); |
Tao Bao | 8f23757 | 2017-03-26 13:36:49 -0700 | [diff] [blame] | 126 | } |
Tao Bao | bf5b77d | 2017-03-30 16:57:29 -0700 | [diff] [blame] | 127 | |
| 128 | // blocks() gives the number of all blocks in this RangeSet. |
| 129 | size_t blocks() const { |
| 130 | return blocks_; |
| 131 | } |
| 132 | |
| 133 | // We provide const iterators only. |
| 134 | std::vector<Range>::const_iterator cbegin() const { |
| 135 | return ranges_.cbegin(); |
| 136 | } |
| 137 | |
| 138 | std::vector<Range>::const_iterator cend() const { |
| 139 | return ranges_.cend(); |
| 140 | } |
| 141 | |
| 142 | // Need to provide begin()/end() since range-based loop expects begin()/end(). |
| 143 | std::vector<Range>::const_iterator begin() const { |
| 144 | return ranges_.cbegin(); |
| 145 | } |
| 146 | |
| 147 | std::vector<Range>::const_iterator end() const { |
| 148 | return ranges_.cend(); |
| 149 | } |
| 150 | |
| 151 | // Reverse const iterators for MoveRange(). |
| 152 | std::vector<Range>::const_reverse_iterator crbegin() const { |
| 153 | return ranges_.crbegin(); |
| 154 | } |
| 155 | |
| 156 | std::vector<Range>::const_reverse_iterator crend() const { |
| 157 | return ranges_.crend(); |
| 158 | } |
| 159 | |
| 160 | const Range& operator[](size_t i) const { |
| 161 | return ranges_[i]; |
| 162 | } |
| 163 | |
| 164 | bool operator==(const RangeSet& other) const { |
| 165 | // The orders of Range's matter. "4,1,5,8,10" != "4,8,10,1,5". |
| 166 | return (ranges_ == other.ranges_); |
| 167 | } |
| 168 | |
| 169 | bool operator!=(const RangeSet& other) const { |
| 170 | return ranges_ != other.ranges_; |
| 171 | } |
| 172 | |
Tianjie Xu | b9e7fc7 | 2017-07-26 16:41:24 -0700 | [diff] [blame] | 173 | protected: |
Tao Bao | bf5b77d | 2017-03-30 16:57:29 -0700 | [diff] [blame] | 174 | // Actual limit for each value and the total number are both INT_MAX. |
| 175 | std::vector<Range> ranges_; |
| 176 | size_t blocks_; |
Tao Bao | 8f23757 | 2017-03-26 13:36:49 -0700 | [diff] [blame] | 177 | }; |
Tianjie Xu | b9e7fc7 | 2017-07-26 16:41:24 -0700 | [diff] [blame] | 178 | |
| 179 | static constexpr size_t kBlockSize = 4096; |
| 180 | |
| 181 | // The class is a sorted version of a RangeSet; and it's useful in imgdiff to split the input |
| 182 | // files when we're handling large zip files. Specifically, we can treat the input file as a |
| 183 | // continuous RangeSet (i.e. RangeSet("0-99") for a 100 blocks file); and break it down into |
| 184 | // several smaller chunks based on the zip entries. |
| 185 | |
| 186 | // For example, [source: 0-99] can be split into |
| 187 | // [split_src1: 10-29]; [split_src2: 40-49, 60-69]; [split_src3: 70-89] |
| 188 | // Here "10-29" simply means block 10th to block 29th with respect to the original input file. |
| 189 | // Also, note that the split sources should be mutual exclusive, but they don't need to cover |
| 190 | // every block in the original source. |
| 191 | class SortedRangeSet : public RangeSet { |
| 192 | public: |
| 193 | SortedRangeSet() {} |
| 194 | |
| 195 | // Ranges in the the set should be mutually exclusive; and they're sorted by the start block. |
| 196 | explicit SortedRangeSet(std::vector<Range>&& pairs) : RangeSet(std::move(pairs)) { |
| 197 | std::sort(ranges_.begin(), ranges_.end()); |
| 198 | } |
| 199 | |
| 200 | void Insert(const Range& to_insert) { |
| 201 | SortedRangeSet rs({ to_insert }); |
| 202 | Insert(rs); |
| 203 | } |
| 204 | |
| 205 | // Insert the input SortedRangeSet; keep the ranges sorted and merge the overlap ranges. |
| 206 | void Insert(const SortedRangeSet& rs) { |
| 207 | if (rs.size() == 0) { |
| 208 | return; |
| 209 | } |
| 210 | // Merge and sort the two RangeSets. |
| 211 | std::vector<Range> temp = std::move(ranges_); |
| 212 | std::copy(rs.begin(), rs.end(), std::back_inserter(temp)); |
| 213 | std::sort(temp.begin(), temp.end()); |
| 214 | |
| 215 | Clear(); |
| 216 | // Trim overlaps and insert the result back to ranges_. |
| 217 | Range to_insert = temp.front(); |
| 218 | for (auto it = temp.cbegin() + 1; it != temp.cend(); it++) { |
| 219 | if (it->first <= to_insert.second) { |
| 220 | to_insert.second = std::max(to_insert.second, it->second); |
| 221 | } else { |
| 222 | ranges_.push_back(to_insert); |
| 223 | blocks_ += (to_insert.second - to_insert.first); |
| 224 | to_insert = *it; |
| 225 | } |
| 226 | } |
| 227 | ranges_.push_back(to_insert); |
| 228 | blocks_ += (to_insert.second - to_insert.first); |
| 229 | } |
| 230 | |
| 231 | void Clear() { |
| 232 | blocks_ = 0; |
| 233 | ranges_.clear(); |
| 234 | } |
| 235 | |
| 236 | using RangeSet::Overlaps; |
| 237 | bool Overlaps(size_t start, size_t len) const { |
| 238 | RangeSet rs({ { start / kBlockSize, (start + len - 1) / kBlockSize + 1 } }); |
| 239 | return Overlaps(rs); |
| 240 | } |
| 241 | |
| 242 | // Compute the block range the file occupies, and insert that range. |
| 243 | void Insert(size_t start, size_t len) { |
| 244 | Range to_insert{ start / kBlockSize, (start + len - 1) / kBlockSize + 1 }; |
| 245 | Insert(to_insert); |
| 246 | } |
| 247 | |
| 248 | // Given an offset of the file, checks if the corresponding block (by considering the file as |
| 249 | // 0-based continuous block ranges) is covered by the SortedRangeSet. If so, returns the offset |
| 250 | // within this SortedRangeSet. |
| 251 | // |
| 252 | // For example, the 4106-th byte of a file is from block 1, assuming a block size of 4096-byte. |
| 253 | // The mapped offset within a SortedRangeSet("1-9 15-19") is 10. |
| 254 | // |
| 255 | // An offset of 65546 falls into the 16-th block in a file. Block 16 is contained as the 10-th |
| 256 | // item in SortedRangeSet("1-9 15-19"). So its data can be found at offset 40970 (i.e. 4096 * 10 |
| 257 | // + 10) in a range represented by this SortedRangeSet. |
| 258 | size_t GetOffsetInRangeSet(size_t old_offset) const { |
| 259 | size_t old_block_start = old_offset / kBlockSize; |
| 260 | size_t new_block_start = 0; |
| 261 | for (const auto& range : ranges_) { |
| 262 | // Find the index of old_block_start. |
| 263 | if (old_block_start >= range.second) { |
| 264 | new_block_start += (range.second - range.first); |
| 265 | } else if (old_block_start >= range.first) { |
| 266 | new_block_start += (old_block_start - range.first); |
| 267 | return (new_block_start * kBlockSize + old_offset % kBlockSize); |
| 268 | } else { |
Tianjie Xu | 57dd961 | 2017-08-17 17:50:56 -0700 | [diff] [blame] | 269 | CHECK(false) << "block_start " << old_block_start |
| 270 | << " is missing between two ranges: " << this->ToString(); |
Tianjie Xu | b9e7fc7 | 2017-07-26 16:41:24 -0700 | [diff] [blame] | 271 | return 0; |
| 272 | } |
| 273 | } |
Tianjie Xu | 57dd961 | 2017-08-17 17:50:56 -0700 | [diff] [blame] | 274 | CHECK(false) << "block_start " << old_block_start |
| 275 | << " exceeds the limit of current RangeSet: " << this->ToString(); |
Tianjie Xu | b9e7fc7 | 2017-07-26 16:41:24 -0700 | [diff] [blame] | 276 | return 0; |
| 277 | } |
| 278 | }; |