Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2017 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #include "otautil/rangeset.h" |
| 18 | |
Tao Bao | 6798315 | 2017-11-04 00:08:08 -0700 | [diff] [blame] | 19 | #include <limits.h> |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 20 | #include <stddef.h> |
| 21 | |
Tao Bao | 6798315 | 2017-11-04 00:08:08 -0700 | [diff] [blame] | 22 | #include <algorithm> |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 23 | #include <string> |
| 24 | #include <utility> |
| 25 | #include <vector> |
| 26 | |
| 27 | #include <android-base/logging.h> |
| 28 | #include <android-base/parseint.h> |
| 29 | #include <android-base/stringprintf.h> |
| 30 | #include <android-base/strings.h> |
| 31 | |
| 32 | RangeSet::RangeSet(std::vector<Range>&& pairs) { |
Tao Bao | 6798315 | 2017-11-04 00:08:08 -0700 | [diff] [blame] | 33 | blocks_ = 0; |
| 34 | if (pairs.empty()) { |
| 35 | LOG(ERROR) << "Invalid number of tokens"; |
| 36 | return; |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 37 | } |
| 38 | |
Tao Bao | 6798315 | 2017-11-04 00:08:08 -0700 | [diff] [blame] | 39 | for (const auto& range : pairs) { |
| 40 | if (!PushBack(range)) { |
| 41 | Clear(); |
| 42 | return; |
| 43 | } |
| 44 | } |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 45 | } |
| 46 | |
| 47 | RangeSet RangeSet::Parse(const std::string& range_text) { |
| 48 | std::vector<std::string> pieces = android::base::Split(range_text, ","); |
Tao Bao | 6798315 | 2017-11-04 00:08:08 -0700 | [diff] [blame] | 49 | if (pieces.size() < 3) { |
| 50 | LOG(ERROR) << "Invalid range text: " << range_text; |
| 51 | return {}; |
| 52 | } |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 53 | |
| 54 | size_t num; |
Tao Bao | 6798315 | 2017-11-04 00:08:08 -0700 | [diff] [blame] | 55 | if (!android::base::ParseUint(pieces[0], &num, static_cast<size_t>(INT_MAX))) { |
| 56 | LOG(ERROR) << "Failed to parse the number of tokens: " << range_text; |
| 57 | return {}; |
| 58 | } |
| 59 | if (num == 0) { |
| 60 | LOG(ERROR) << "Invalid number of tokens: " << range_text; |
| 61 | return {}; |
| 62 | } |
| 63 | if (num % 2 != 0) { |
| 64 | LOG(ERROR) << "Number of tokens must be even: " << range_text; |
| 65 | return {}; |
| 66 | } |
| 67 | if (num != pieces.size() - 1) { |
| 68 | LOG(ERROR) << "Mismatching number of tokens: " << range_text; |
| 69 | return {}; |
| 70 | } |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 71 | |
| 72 | std::vector<Range> pairs; |
| 73 | for (size_t i = 0; i < num; i += 2) { |
| 74 | size_t first; |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 75 | size_t second; |
Tao Bao | 6798315 | 2017-11-04 00:08:08 -0700 | [diff] [blame] | 76 | if (!android::base::ParseUint(pieces[i + 1], &first, static_cast<size_t>(INT_MAX)) || |
| 77 | !android::base::ParseUint(pieces[i + 2], &second, static_cast<size_t>(INT_MAX))) { |
| 78 | return {}; |
| 79 | } |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 80 | pairs.emplace_back(first, second); |
| 81 | } |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 82 | return RangeSet(std::move(pairs)); |
| 83 | } |
| 84 | |
Tao Bao | 6798315 | 2017-11-04 00:08:08 -0700 | [diff] [blame] | 85 | bool RangeSet::PushBack(Range range) { |
| 86 | if (range.first >= range.second) { |
| 87 | LOG(ERROR) << "Empty or negative range: " << range.first << ", " << range.second; |
| 88 | return false; |
| 89 | } |
| 90 | size_t sz = range.second - range.first; |
| 91 | if (blocks_ >= SIZE_MAX - sz) { |
| 92 | LOG(ERROR) << "RangeSet size overflow"; |
| 93 | return false; |
| 94 | } |
| 95 | |
| 96 | ranges_.push_back(std::move(range)); |
| 97 | blocks_ += sz; |
| 98 | return true; |
| 99 | } |
| 100 | |
| 101 | void RangeSet::Clear() { |
| 102 | ranges_.clear(); |
| 103 | blocks_ = 0; |
| 104 | } |
| 105 | |
Tao Bao | 160514b | 2017-11-04 00:08:08 -0700 | [diff] [blame] | 106 | std::vector<RangeSet> RangeSet::Split(size_t groups) const { |
| 107 | if (ranges_.empty() || groups == 0) return {}; |
| 108 | |
| 109 | if (blocks_ < groups) { |
| 110 | groups = blocks_; |
| 111 | } |
| 112 | |
| 113 | // Evenly distribute blocks, with the first few groups possibly containing one more. |
| 114 | size_t mean = blocks_ / groups; |
| 115 | std::vector<size_t> blocks_per_group(groups, mean); |
| 116 | std::fill_n(blocks_per_group.begin(), blocks_ % groups, mean + 1); |
| 117 | |
| 118 | std::vector<RangeSet> result; |
| 119 | |
| 120 | // Forward iterate Ranges and fill up each group with the desired number of blocks. |
| 121 | auto it = ranges_.cbegin(); |
| 122 | Range range = *it; |
| 123 | for (const auto& blocks : blocks_per_group) { |
| 124 | RangeSet buffer; |
| 125 | size_t needed = blocks; |
| 126 | while (needed > 0) { |
| 127 | size_t range_blocks = range.second - range.first; |
| 128 | if (range_blocks > needed) { |
| 129 | // Split the current range and don't advance the iterator. |
| 130 | buffer.PushBack({ range.first, range.first + needed }); |
| 131 | range.first = range.first + needed; |
| 132 | break; |
| 133 | } |
| 134 | buffer.PushBack(range); |
| 135 | it++; |
| 136 | if (it != ranges_.cend()) { |
| 137 | range = *it; |
| 138 | } |
| 139 | needed -= range_blocks; |
| 140 | } |
| 141 | result.push_back(std::move(buffer)); |
| 142 | } |
| 143 | return result; |
| 144 | } |
| 145 | |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 146 | std::string RangeSet::ToString() const { |
| 147 | if (ranges_.empty()) { |
| 148 | return ""; |
| 149 | } |
| 150 | std::string result = std::to_string(ranges_.size() * 2); |
Tao Bao | 43bfa6e | 2018-08-28 10:09:13 -0700 | [diff] [blame] | 151 | for (const auto& [begin, end] : ranges_) { |
| 152 | result += android::base::StringPrintf(",%zu,%zu", begin, end); |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 153 | } |
| 154 | |
| 155 | return result; |
| 156 | } |
| 157 | |
| 158 | // Get the block number for the i-th (starting from 0) block in the RangeSet. |
| 159 | size_t RangeSet::GetBlockNumber(size_t idx) const { |
| 160 | CHECK_LT(idx, blocks_) << "Out of bound index " << idx << " (total blocks: " << blocks_ << ")"; |
| 161 | |
Tao Bao | 43bfa6e | 2018-08-28 10:09:13 -0700 | [diff] [blame] | 162 | for (const auto& [begin, end] : ranges_) { |
| 163 | if (idx < end - begin) { |
| 164 | return begin + idx; |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 165 | } |
Tao Bao | 43bfa6e | 2018-08-28 10:09:13 -0700 | [diff] [blame] | 166 | idx -= (end - begin); |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 167 | } |
| 168 | |
| 169 | CHECK(false) << "Failed to find block number for index " << idx; |
| 170 | return 0; // Unreachable, but to make compiler happy. |
| 171 | } |
| 172 | |
| 173 | // RangeSet has half-closed half-open bounds. For example, "3,5" contains blocks 3 and 4. So "3,5" |
| 174 | // and "5,7" are not overlapped. |
| 175 | bool RangeSet::Overlaps(const RangeSet& other) const { |
Tao Bao | 43bfa6e | 2018-08-28 10:09:13 -0700 | [diff] [blame] | 176 | for (const auto& [begin, end] : ranges_) { |
| 177 | for (const auto& [other_begin, other_end] : other.ranges_) { |
| 178 | // [begin, end) vs [other_begin, other_end) |
| 179 | if (!(other_begin >= end || begin >= other_end)) { |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 180 | return true; |
| 181 | } |
| 182 | } |
| 183 | } |
| 184 | return false; |
| 185 | } |
| 186 | |
xunchang | 311e6ca | 2019-03-22 08:54:35 -0700 | [diff] [blame] | 187 | std::optional<RangeSet> RangeSet::GetSubRanges(size_t start_index, size_t num_of_blocks) const { |
| 188 | size_t end_index = start_index + num_of_blocks; // The index of final block to read plus one |
| 189 | if (start_index > end_index || end_index > blocks_) { |
| 190 | LOG(ERROR) << "Failed to get the sub ranges for start_index " << start_index |
| 191 | << " num_of_blocks " << num_of_blocks |
| 192 | << " total number of blocks the range contains is " << blocks_; |
| 193 | return std::nullopt; |
| 194 | } |
| 195 | |
| 196 | if (num_of_blocks == 0) { |
| 197 | LOG(WARNING) << "num_of_blocks is zero when calling GetSubRanges()"; |
| 198 | return RangeSet(); |
| 199 | } |
| 200 | |
| 201 | RangeSet result; |
| 202 | size_t current_index = 0; |
| 203 | for (const auto& [range_start, range_end] : ranges_) { |
| 204 | CHECK_LT(range_start, range_end); |
| 205 | size_t blocks_in_range = range_end - range_start; |
| 206 | // Linear search to skip the ranges until we reach start_block. |
| 207 | if (current_index + blocks_in_range <= start_index) { |
| 208 | current_index += blocks_in_range; |
| 209 | continue; |
| 210 | } |
| 211 | |
| 212 | size_t trimmed_range_start = range_start; |
| 213 | // We have found the first block range to read, trim the heading blocks. |
| 214 | if (current_index < start_index) { |
| 215 | trimmed_range_start += start_index - current_index; |
| 216 | } |
| 217 | // Trim the trailing blocks if the last range has more blocks than desired; also return the |
| 218 | // result. |
| 219 | if (current_index + blocks_in_range >= end_index) { |
| 220 | size_t trimmed_range_end = range_end - (current_index + blocks_in_range - end_index); |
| 221 | if (!result.PushBack({ trimmed_range_start, trimmed_range_end })) { |
| 222 | return std::nullopt; |
| 223 | } |
| 224 | |
| 225 | return result; |
| 226 | } |
| 227 | |
| 228 | if (!result.PushBack({ trimmed_range_start, range_end })) { |
| 229 | return std::nullopt; |
| 230 | } |
| 231 | current_index += blocks_in_range; |
| 232 | } |
| 233 | |
| 234 | LOG(ERROR) << "Failed to construct byte ranges to read, start_block: " << start_index |
| 235 | << ", num_of_blocks: " << num_of_blocks << " total number of blocks: " << blocks_; |
| 236 | return std::nullopt; |
| 237 | } |
| 238 | |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 239 | // Ranges in the the set should be mutually exclusive; and they're sorted by the start block. |
| 240 | SortedRangeSet::SortedRangeSet(std::vector<Range>&& pairs) : RangeSet(std::move(pairs)) { |
| 241 | std::sort(ranges_.begin(), ranges_.end()); |
| 242 | } |
| 243 | |
| 244 | void SortedRangeSet::Insert(const Range& to_insert) { |
| 245 | SortedRangeSet rs({ to_insert }); |
| 246 | Insert(rs); |
| 247 | } |
| 248 | |
| 249 | // Insert the input SortedRangeSet; keep the ranges sorted and merge the overlap ranges. |
| 250 | void SortedRangeSet::Insert(const SortedRangeSet& rs) { |
| 251 | if (rs.size() == 0) { |
| 252 | return; |
| 253 | } |
| 254 | // Merge and sort the two RangeSets. |
| 255 | std::vector<Range> temp = std::move(ranges_); |
| 256 | std::copy(rs.begin(), rs.end(), std::back_inserter(temp)); |
| 257 | std::sort(temp.begin(), temp.end()); |
| 258 | |
| 259 | Clear(); |
| 260 | // Trim overlaps and insert the result back to ranges_. |
| 261 | Range to_insert = temp.front(); |
| 262 | for (auto it = temp.cbegin() + 1; it != temp.cend(); it++) { |
| 263 | if (it->first <= to_insert.second) { |
| 264 | to_insert.second = std::max(to_insert.second, it->second); |
| 265 | } else { |
| 266 | ranges_.push_back(to_insert); |
| 267 | blocks_ += (to_insert.second - to_insert.first); |
| 268 | to_insert = *it; |
| 269 | } |
| 270 | } |
| 271 | ranges_.push_back(to_insert); |
| 272 | blocks_ += (to_insert.second - to_insert.first); |
| 273 | } |
| 274 | |
| 275 | // Compute the block range the file occupies, and insert that range. |
| 276 | void SortedRangeSet::Insert(size_t start, size_t len) { |
| 277 | Range to_insert{ start / kBlockSize, (start + len - 1) / kBlockSize + 1 }; |
| 278 | Insert(to_insert); |
| 279 | } |
| 280 | |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 281 | bool SortedRangeSet::Overlaps(size_t start, size_t len) const { |
| 282 | RangeSet rs({ { start / kBlockSize, (start + len - 1) / kBlockSize + 1 } }); |
| 283 | return Overlaps(rs); |
| 284 | } |
| 285 | |
| 286 | // Given an offset of the file, checks if the corresponding block (by considering the file as |
| 287 | // 0-based continuous block ranges) is covered by the SortedRangeSet. If so, returns the offset |
| 288 | // within this SortedRangeSet. |
| 289 | // |
| 290 | // For example, the 4106-th byte of a file is from block 1, assuming a block size of 4096-byte. |
| 291 | // The mapped offset within a SortedRangeSet("1-9 15-19") is 10. |
| 292 | // |
| 293 | // An offset of 65546 falls into the 16-th block in a file. Block 16 is contained as the 10-th |
| 294 | // item in SortedRangeSet("1-9 15-19"). So its data can be found at offset 40970 (i.e. 4096 * 10 |
| 295 | // + 10) in a range represented by this SortedRangeSet. |
| 296 | size_t SortedRangeSet::GetOffsetInRangeSet(size_t old_offset) const { |
| 297 | size_t old_block_start = old_offset / kBlockSize; |
| 298 | size_t new_block_start = 0; |
Tao Bao | 43bfa6e | 2018-08-28 10:09:13 -0700 | [diff] [blame] | 299 | for (const auto& [start, end] : ranges_) { |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 300 | // Find the index of old_block_start. |
Tao Bao | 43bfa6e | 2018-08-28 10:09:13 -0700 | [diff] [blame] | 301 | if (old_block_start >= end) { |
| 302 | new_block_start += (end - start); |
| 303 | } else if (old_block_start >= start) { |
| 304 | new_block_start += (old_block_start - start); |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 305 | return (new_block_start * kBlockSize + old_offset % kBlockSize); |
| 306 | } else { |
| 307 | CHECK(false) << "block_start " << old_block_start |
Tao Bao | 43bfa6e | 2018-08-28 10:09:13 -0700 | [diff] [blame] | 308 | << " is missing between two ranges: " << ToString(); |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 309 | return 0; |
| 310 | } |
| 311 | } |
| 312 | CHECK(false) << "block_start " << old_block_start |
Tao Bao | 43bfa6e | 2018-08-28 10:09:13 -0700 | [diff] [blame] | 313 | << " exceeds the limit of current RangeSet: " << ToString(); |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 314 | return 0; |
| 315 | } |