Merge "Add implemention of SortedRangeSet" am: 64cba55fbc am: fbd4b10310 am: a09376ed90 am: 8b09001e47
am: fd0e740c61
Change-Id: I1a150ab61fd631252faa774455cd3aca83e8dd4c
diff --git a/tests/unit/rangeset_test.cpp b/tests/unit/rangeset_test.cpp
index 3c6d77e..3993cb9 100644
--- a/tests/unit/rangeset_test.cpp
+++ b/tests/unit/rangeset_test.cpp
@@ -110,3 +110,50 @@
}
ASSERT_EQ((std::vector<Range>{ Range{ 8, 10 }, Range{ 1, 5 } }), ranges);
}
+
+TEST(RangeSetTest, tostring) {
+ ASSERT_EQ("2,1,6", RangeSet::Parse("2,1,6").ToString());
+ ASSERT_EQ("4,1,5,8,10", RangeSet::Parse("4,1,5,8,10").ToString());
+ ASSERT_EQ("6,1,3,4,6,15,22", RangeSet::Parse("6,1,3,4,6,15,22").ToString());
+}
+
+TEST(SortedRangeSetTest, insertion) {
+ SortedRangeSet rs({ { 2, 3 }, { 4, 6 }, { 8, 14 } });
+ rs.Insert({ 1, 2 });
+ ASSERT_EQ(SortedRangeSet({ { 1, 3 }, { 4, 6 }, { 8, 14 } }), rs);
+ ASSERT_EQ(static_cast<size_t>(10), rs.blocks());
+ rs.Insert({ 3, 5 });
+ ASSERT_EQ(SortedRangeSet({ { 1, 6 }, { 8, 14 } }), rs);
+ ASSERT_EQ(static_cast<size_t>(11), rs.blocks());
+
+ SortedRangeSet r1({ { 20, 22 }, { 15, 18 } });
+ rs.Insert(r1);
+ ASSERT_EQ(SortedRangeSet({ { 1, 6 }, { 8, 14 }, { 15, 18 }, { 20, 22 } }), rs);
+ ASSERT_EQ(static_cast<size_t>(16), rs.blocks());
+
+ SortedRangeSet r2({ { 2, 7 }, { 15, 21 }, { 20, 25 } });
+ rs.Insert(r2);
+ ASSERT_EQ(SortedRangeSet({ { 1, 7 }, { 8, 14 }, { 15, 25 } }), rs);
+ ASSERT_EQ(static_cast<size_t>(22), rs.blocks());
+}
+
+TEST(SortedRangeSetTest, file_range) {
+ SortedRangeSet rs;
+ rs.Insert(4096, 4096);
+ ASSERT_EQ(SortedRangeSet({ { 1, 2 } }), rs);
+ // insert block 2-9
+ rs.Insert(4096 * 3 - 1, 4096 * 7);
+ ASSERT_EQ(SortedRangeSet({ { 1, 10 } }), rs);
+ // insert block 15-19
+ rs.Insert(4096 * 15 + 1, 4096 * 4);
+ ASSERT_EQ(SortedRangeSet({ { 1, 10 }, { 15, 20 } }), rs);
+
+ // rs overlaps block 2-2
+ ASSERT_TRUE(rs.Overlaps(4096 * 2 - 1, 10));
+ ASSERT_FALSE(rs.Overlaps(4096 * 10, 4096 * 5));
+
+ ASSERT_EQ(static_cast<size_t>(10), rs.GetOffsetInRangeSet(4106));
+ ASSERT_EQ(static_cast<size_t>(40970), rs.GetOffsetInRangeSet(4096 * 16 + 10));
+ // block#10 not in range.
+ ASSERT_EXIT(rs.GetOffsetInRangeSet(40970), ::testing::KilledBySignal(SIGABRT), "");
+}
\ No newline at end of file
diff --git a/updater/include/updater/rangeset.h b/updater/include/updater/rangeset.h
index fad0380..b67c987 100644
--- a/updater/include/updater/rangeset.h
+++ b/updater/include/updater/rangeset.h
@@ -24,6 +24,7 @@
#include <android-base/logging.h>
#include <android-base/parseint.h>
+#include <android-base/stringprintf.h>
#include <android-base/strings.h>
using Range = std::pair<size_t, size_t>;
@@ -74,6 +75,18 @@
return RangeSet(std::move(pairs));
}
+ std::string ToString() const {
+ if (ranges_.empty()) {
+ return "";
+ }
+ std::string result = std::to_string(ranges_.size() * 2);
+ for (const auto& r : ranges_) {
+ result += android::base::StringPrintf(",%zu,%zu", r.first, r.second);
+ }
+
+ return result;
+ }
+
// Get the block number for the i-th (starting from 0) block in the RangeSet.
size_t GetBlockNumber(size_t idx) const {
CHECK_LT(idx, blocks_) << "Out of bound index " << idx << " (total blocks: " << blocks_ << ")";
@@ -157,8 +170,109 @@
return ranges_ != other.ranges_;
}
- private:
+ protected:
// Actual limit for each value and the total number are both INT_MAX.
std::vector<Range> ranges_;
size_t blocks_;
};
+
+static constexpr size_t kBlockSize = 4096;
+
+// The class is a sorted version of a RangeSet; and it's useful in imgdiff to split the input
+// files when we're handling large zip files. Specifically, we can treat the input file as a
+// continuous RangeSet (i.e. RangeSet("0-99") for a 100 blocks file); and break it down into
+// several smaller chunks based on the zip entries.
+
+// For example, [source: 0-99] can be split into
+// [split_src1: 10-29]; [split_src2: 40-49, 60-69]; [split_src3: 70-89]
+// Here "10-29" simply means block 10th to block 29th with respect to the original input file.
+// Also, note that the split sources should be mutual exclusive, but they don't need to cover
+// every block in the original source.
+class SortedRangeSet : public RangeSet {
+ public:
+ SortedRangeSet() {}
+
+ // Ranges in the the set should be mutually exclusive; and they're sorted by the start block.
+ explicit SortedRangeSet(std::vector<Range>&& pairs) : RangeSet(std::move(pairs)) {
+ std::sort(ranges_.begin(), ranges_.end());
+ }
+
+ void Insert(const Range& to_insert) {
+ SortedRangeSet rs({ to_insert });
+ Insert(rs);
+ }
+
+ // Insert the input SortedRangeSet; keep the ranges sorted and merge the overlap ranges.
+ void Insert(const SortedRangeSet& rs) {
+ if (rs.size() == 0) {
+ return;
+ }
+ // Merge and sort the two RangeSets.
+ std::vector<Range> temp = std::move(ranges_);
+ std::copy(rs.begin(), rs.end(), std::back_inserter(temp));
+ std::sort(temp.begin(), temp.end());
+
+ Clear();
+ // Trim overlaps and insert the result back to ranges_.
+ Range to_insert = temp.front();
+ for (auto it = temp.cbegin() + 1; it != temp.cend(); it++) {
+ if (it->first <= to_insert.second) {
+ to_insert.second = std::max(to_insert.second, it->second);
+ } else {
+ ranges_.push_back(to_insert);
+ blocks_ += (to_insert.second - to_insert.first);
+ to_insert = *it;
+ }
+ }
+ ranges_.push_back(to_insert);
+ blocks_ += (to_insert.second - to_insert.first);
+ }
+
+ void Clear() {
+ blocks_ = 0;
+ ranges_.clear();
+ }
+
+ using RangeSet::Overlaps;
+ bool Overlaps(size_t start, size_t len) const {
+ RangeSet rs({ { start / kBlockSize, (start + len - 1) / kBlockSize + 1 } });
+ return Overlaps(rs);
+ }
+
+ // Compute the block range the file occupies, and insert that range.
+ void Insert(size_t start, size_t len) {
+ Range to_insert{ start / kBlockSize, (start + len - 1) / kBlockSize + 1 };
+ Insert(to_insert);
+ }
+
+ // Given an offset of the file, checks if the corresponding block (by considering the file as
+ // 0-based continuous block ranges) is covered by the SortedRangeSet. If so, returns the offset
+ // within this SortedRangeSet.
+ //
+ // For example, the 4106-th byte of a file is from block 1, assuming a block size of 4096-byte.
+ // The mapped offset within a SortedRangeSet("1-9 15-19") is 10.
+ //
+ // An offset of 65546 falls into the 16-th block in a file. Block 16 is contained as the 10-th
+ // item in SortedRangeSet("1-9 15-19"). So its data can be found at offset 40970 (i.e. 4096 * 10
+ // + 10) in a range represented by this SortedRangeSet.
+ size_t GetOffsetInRangeSet(size_t old_offset) const {
+ size_t old_block_start = old_offset / kBlockSize;
+ size_t new_block_start = 0;
+ for (const auto& range : ranges_) {
+ // Find the index of old_block_start.
+ if (old_block_start >= range.second) {
+ new_block_start += (range.second - range.first);
+ } else if (old_block_start >= range.first) {
+ new_block_start += (old_block_start - range.first);
+ return (new_block_start * kBlockSize + old_offset % kBlockSize);
+ } else {
+ CHECK(false) <<"block_start " << old_block_start << " is missing between two ranges: "
+ << this->ToString();
+ return 0;
+ }
+ }
+ CHECK(false) <<"block_start " << old_block_start << " exceeds the limit of current RangeSet: "
+ << this->ToString();
+ return 0;
+ }
+};
\ No newline at end of file