Tao Bao | 8f23757 | 2017-03-26 13:36:49 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2017 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #pragma once |
| 18 | |
| 19 | #include <stddef.h> |
| 20 | |
| 21 | #include <string> |
Tao Bao | bf5b77d | 2017-03-30 16:57:29 -0700 | [diff] [blame] | 22 | #include <utility> |
Tao Bao | 8f23757 | 2017-03-26 13:36:49 -0700 | [diff] [blame] | 23 | #include <vector> |
| 24 | |
Tao Bao | bf5b77d | 2017-03-30 16:57:29 -0700 | [diff] [blame] | 25 | using Range = std::pair<size_t, size_t>; |
| 26 | |
| 27 | class RangeSet { |
| 28 | public: |
| 29 | RangeSet() : blocks_(0) {} |
| 30 | |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 31 | explicit RangeSet(std::vector<Range>&& pairs); |
Tao Bao | bf5b77d | 2017-03-30 16:57:29 -0700 | [diff] [blame] | 32 | |
Tao Bao | 6798315 | 2017-11-04 00:08:08 -0700 | [diff] [blame] | 33 | // Parses the given string into a RangeSet. Returns the parsed RangeSet, or an empty RangeSet on |
| 34 | // errors. |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 35 | static RangeSet Parse(const std::string& range_text); |
Tao Bao | bf5b77d | 2017-03-30 16:57:29 -0700 | [diff] [blame] | 36 | |
Tao Bao | 6798315 | 2017-11-04 00:08:08 -0700 | [diff] [blame] | 37 | // Appends the given Range to the current RangeSet. |
| 38 | bool PushBack(Range range); |
| 39 | |
| 40 | // Clears all the ranges from the RangeSet. |
| 41 | void Clear(); |
| 42 | |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 43 | std::string ToString() const; |
Tianjie Xu | b9e7fc7 | 2017-07-26 16:41:24 -0700 | [diff] [blame] | 44 | |
Tao Bao | 6798315 | 2017-11-04 00:08:08 -0700 | [diff] [blame] | 45 | // Gets the block number for the i-th (starting from 0) block in the RangeSet. |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 46 | size_t GetBlockNumber(size_t idx) const; |
Tao Bao | 8f23757 | 2017-03-26 13:36:49 -0700 | [diff] [blame] | 47 | |
Tao Bao | 6798315 | 2017-11-04 00:08:08 -0700 | [diff] [blame] | 48 | // Returns whether the current RangeSet overlaps with other. RangeSet has half-closed half-open |
| 49 | // bounds. For example, "3,5" contains blocks 3 and 4. So "3,5" and "5,7" are not overlapped. |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 50 | bool Overlaps(const RangeSet& other) const; |
Tao Bao | 8f23757 | 2017-03-26 13:36:49 -0700 | [diff] [blame] | 51 | |
Tao Bao | 160514b | 2017-11-04 00:08:08 -0700 | [diff] [blame] | 52 | // Returns a vector of RangeSets that contain the same set of blocks represented by the current |
| 53 | // RangeSet. The RangeSets in the vector contain similar number of blocks, with a maximum delta |
| 54 | // of 1-block between any two of them. For example, 14 blocks would be split into 4 + 4 + 3 + 3, |
| 55 | // as opposed to 4 + 4 + 4 + 2. If the total number of blocks (T) is less than groups, it |
| 56 | // returns a vector of T 1-block RangeSets. Otherwise the number of the returned RangeSets must |
| 57 | // equal to groups. The current RangeSet remains intact after the split. |
| 58 | std::vector<RangeSet> Split(size_t groups) const; |
| 59 | |
Tao Bao | 6798315 | 2017-11-04 00:08:08 -0700 | [diff] [blame] | 60 | // Returns the number of Range's in this RangeSet. |
Tao Bao | bf5b77d | 2017-03-30 16:57:29 -0700 | [diff] [blame] | 61 | size_t size() const { |
| 62 | return ranges_.size(); |
Tao Bao | 8f23757 | 2017-03-26 13:36:49 -0700 | [diff] [blame] | 63 | } |
Tao Bao | bf5b77d | 2017-03-30 16:57:29 -0700 | [diff] [blame] | 64 | |
Tao Bao | 6798315 | 2017-11-04 00:08:08 -0700 | [diff] [blame] | 65 | // Returns the total number of blocks in this RangeSet. |
Tao Bao | bf5b77d | 2017-03-30 16:57:29 -0700 | [diff] [blame] | 66 | size_t blocks() const { |
| 67 | return blocks_; |
| 68 | } |
| 69 | |
Tao Bao | bf5b77d | 2017-03-30 16:57:29 -0700 | [diff] [blame] | 70 | std::vector<Range>::const_iterator cbegin() const { |
| 71 | return ranges_.cbegin(); |
| 72 | } |
| 73 | |
| 74 | std::vector<Range>::const_iterator cend() const { |
| 75 | return ranges_.cend(); |
| 76 | } |
| 77 | |
Tao Bao | 6798315 | 2017-11-04 00:08:08 -0700 | [diff] [blame] | 78 | std::vector<Range>::iterator begin() { |
| 79 | return ranges_.begin(); |
| 80 | } |
| 81 | |
| 82 | std::vector<Range>::iterator end() { |
| 83 | return ranges_.end(); |
| 84 | } |
| 85 | |
Tao Bao | bf5b77d | 2017-03-30 16:57:29 -0700 | [diff] [blame] | 86 | std::vector<Range>::const_iterator begin() const { |
Tao Bao | 6798315 | 2017-11-04 00:08:08 -0700 | [diff] [blame] | 87 | return ranges_.begin(); |
Tao Bao | bf5b77d | 2017-03-30 16:57:29 -0700 | [diff] [blame] | 88 | } |
| 89 | |
| 90 | std::vector<Range>::const_iterator end() const { |
Tao Bao | 6798315 | 2017-11-04 00:08:08 -0700 | [diff] [blame] | 91 | return ranges_.end(); |
Tao Bao | bf5b77d | 2017-03-30 16:57:29 -0700 | [diff] [blame] | 92 | } |
| 93 | |
| 94 | // Reverse const iterators for MoveRange(). |
| 95 | std::vector<Range>::const_reverse_iterator crbegin() const { |
| 96 | return ranges_.crbegin(); |
| 97 | } |
| 98 | |
| 99 | std::vector<Range>::const_reverse_iterator crend() const { |
| 100 | return ranges_.crend(); |
| 101 | } |
| 102 | |
Tao Bao | 6798315 | 2017-11-04 00:08:08 -0700 | [diff] [blame] | 103 | // Returns whether the RangeSet is valid (i.e. non-empty). |
| 104 | explicit operator bool() const { |
| 105 | return !ranges_.empty(); |
| 106 | } |
| 107 | |
Tao Bao | bf5b77d | 2017-03-30 16:57:29 -0700 | [diff] [blame] | 108 | const Range& operator[](size_t i) const { |
| 109 | return ranges_[i]; |
| 110 | } |
| 111 | |
| 112 | bool operator==(const RangeSet& other) const { |
| 113 | // The orders of Range's matter. "4,1,5,8,10" != "4,8,10,1,5". |
| 114 | return (ranges_ == other.ranges_); |
| 115 | } |
| 116 | |
| 117 | bool operator!=(const RangeSet& other) const { |
| 118 | return ranges_ != other.ranges_; |
| 119 | } |
| 120 | |
Tianjie Xu | b9e7fc7 | 2017-07-26 16:41:24 -0700 | [diff] [blame] | 121 | protected: |
Tao Bao | bf5b77d | 2017-03-30 16:57:29 -0700 | [diff] [blame] | 122 | // Actual limit for each value and the total number are both INT_MAX. |
| 123 | std::vector<Range> ranges_; |
| 124 | size_t blocks_; |
Tao Bao | 8f23757 | 2017-03-26 13:36:49 -0700 | [diff] [blame] | 125 | }; |
Tianjie Xu | b9e7fc7 | 2017-07-26 16:41:24 -0700 | [diff] [blame] | 126 | |
Tianjie Xu | b9e7fc7 | 2017-07-26 16:41:24 -0700 | [diff] [blame] | 127 | // The class is a sorted version of a RangeSet; and it's useful in imgdiff to split the input |
| 128 | // files when we're handling large zip files. Specifically, we can treat the input file as a |
| 129 | // continuous RangeSet (i.e. RangeSet("0-99") for a 100 blocks file); and break it down into |
| 130 | // several smaller chunks based on the zip entries. |
| 131 | |
| 132 | // For example, [source: 0-99] can be split into |
| 133 | // [split_src1: 10-29]; [split_src2: 40-49, 60-69]; [split_src3: 70-89] |
| 134 | // Here "10-29" simply means block 10th to block 29th with respect to the original input file. |
| 135 | // Also, note that the split sources should be mutual exclusive, but they don't need to cover |
| 136 | // every block in the original source. |
| 137 | class SortedRangeSet : public RangeSet { |
| 138 | public: |
Tao Bao | 6798315 | 2017-11-04 00:08:08 -0700 | [diff] [blame] | 139 | // The block size when working with offset and file length. |
| 140 | static constexpr size_t kBlockSize = 4096; |
| 141 | |
Tianjie Xu | b9e7fc7 | 2017-07-26 16:41:24 -0700 | [diff] [blame] | 142 | SortedRangeSet() {} |
| 143 | |
| 144 | // Ranges in the the set should be mutually exclusive; and they're sorted by the start block. |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 145 | explicit SortedRangeSet(std::vector<Range>&& pairs); |
Tianjie Xu | b9e7fc7 | 2017-07-26 16:41:24 -0700 | [diff] [blame] | 146 | |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 147 | void Insert(const Range& to_insert); |
Tianjie Xu | b9e7fc7 | 2017-07-26 16:41:24 -0700 | [diff] [blame] | 148 | |
| 149 | // Insert the input SortedRangeSet; keep the ranges sorted and merge the overlap ranges. |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 150 | void Insert(const SortedRangeSet& rs); |
Tianjie Xu | b9e7fc7 | 2017-07-26 16:41:24 -0700 | [diff] [blame] | 151 | |
| 152 | // Compute the block range the file occupies, and insert that range. |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 153 | void Insert(size_t start, size_t len); |
| 154 | |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 155 | using RangeSet::Overlaps; |
| 156 | |
| 157 | bool Overlaps(size_t start, size_t len) const; |
Tianjie Xu | b9e7fc7 | 2017-07-26 16:41:24 -0700 | [diff] [blame] | 158 | |
| 159 | // Given an offset of the file, checks if the corresponding block (by considering the file as |
| 160 | // 0-based continuous block ranges) is covered by the SortedRangeSet. If so, returns the offset |
| 161 | // within this SortedRangeSet. |
| 162 | // |
| 163 | // For example, the 4106-th byte of a file is from block 1, assuming a block size of 4096-byte. |
| 164 | // The mapped offset within a SortedRangeSet("1-9 15-19") is 10. |
| 165 | // |
| 166 | // An offset of 65546 falls into the 16-th block in a file. Block 16 is contained as the 10-th |
| 167 | // item in SortedRangeSet("1-9 15-19"). So its data can be found at offset 40970 (i.e. 4096 * 10 |
| 168 | // + 10) in a range represented by this SortedRangeSet. |
Tao Bao | 4568582 | 2017-10-13 14:54:12 -0700 | [diff] [blame] | 169 | size_t GetOffsetInRangeSet(size_t old_offset) const; |
| 170 | }; |