Merge "Add implemention of SortedRangeSet" am: 64cba55fbc am: fbd4b10310 am: a09376ed90 am: 8b09001e47
am: fd0e740c61

Change-Id: I1a150ab61fd631252faa774455cd3aca83e8dd4c
diff --git a/tests/unit/rangeset_test.cpp b/tests/unit/rangeset_test.cpp
index 3c6d77e..3993cb9 100644
--- a/tests/unit/rangeset_test.cpp
+++ b/tests/unit/rangeset_test.cpp
@@ -110,3 +110,50 @@
   }
   ASSERT_EQ((std::vector<Range>{ Range{ 8, 10 }, Range{ 1, 5 } }), ranges);
 }
+
+TEST(RangeSetTest, tostring) {
+  ASSERT_EQ("2,1,6", RangeSet::Parse("2,1,6").ToString());
+  ASSERT_EQ("4,1,5,8,10", RangeSet::Parse("4,1,5,8,10").ToString());
+  ASSERT_EQ("6,1,3,4,6,15,22", RangeSet::Parse("6,1,3,4,6,15,22").ToString());
+}
+
+TEST(SortedRangeSetTest, insertion) {
+  SortedRangeSet rs({ { 2, 3 }, { 4, 6 }, { 8, 14 } });
+  rs.Insert({ 1, 2 });
+  ASSERT_EQ(SortedRangeSet({ { 1, 3 }, { 4, 6 }, { 8, 14 } }), rs);
+  ASSERT_EQ(static_cast<size_t>(10), rs.blocks());
+  rs.Insert({ 3, 5 });
+  ASSERT_EQ(SortedRangeSet({ { 1, 6 }, { 8, 14 } }), rs);
+  ASSERT_EQ(static_cast<size_t>(11), rs.blocks());
+
+  SortedRangeSet r1({ { 20, 22 }, { 15, 18 } });
+  rs.Insert(r1);
+  ASSERT_EQ(SortedRangeSet({ { 1, 6 }, { 8, 14 }, { 15, 18 }, { 20, 22 } }), rs);
+  ASSERT_EQ(static_cast<size_t>(16), rs.blocks());
+
+  SortedRangeSet r2({ { 2, 7 }, { 15, 21 }, { 20, 25 } });
+  rs.Insert(r2);
+  ASSERT_EQ(SortedRangeSet({ { 1, 7 }, { 8, 14 }, { 15, 25 } }), rs);
+  ASSERT_EQ(static_cast<size_t>(22), rs.blocks());
+}
+
+TEST(SortedRangeSetTest, file_range) {
+  SortedRangeSet rs;
+  rs.Insert(4096, 4096);
+  ASSERT_EQ(SortedRangeSet({ { 1, 2 } }), rs);
+  // insert block 2-9
+  rs.Insert(4096 * 3 - 1, 4096 * 7);
+  ASSERT_EQ(SortedRangeSet({ { 1, 10 } }), rs);
+  // insert block 15-19
+  rs.Insert(4096 * 15 + 1, 4096 * 4);
+  ASSERT_EQ(SortedRangeSet({ { 1, 10 }, { 15, 20 } }), rs);
+
+  // rs overlaps block 2-2
+  ASSERT_TRUE(rs.Overlaps(4096 * 2 - 1, 10));
+  ASSERT_FALSE(rs.Overlaps(4096 * 10, 4096 * 5));
+
+  ASSERT_EQ(static_cast<size_t>(10), rs.GetOffsetInRangeSet(4106));
+  ASSERT_EQ(static_cast<size_t>(40970), rs.GetOffsetInRangeSet(4096 * 16 + 10));
+  // block#10 not in range.
+  ASSERT_EXIT(rs.GetOffsetInRangeSet(40970), ::testing::KilledBySignal(SIGABRT), "");
+}
\ No newline at end of file
diff --git a/updater/include/updater/rangeset.h b/updater/include/updater/rangeset.h
index fad0380..b67c987 100644
--- a/updater/include/updater/rangeset.h
+++ b/updater/include/updater/rangeset.h
@@ -24,6 +24,7 @@
 
 #include <android-base/logging.h>
 #include <android-base/parseint.h>
+#include <android-base/stringprintf.h>
 #include <android-base/strings.h>
 
 using Range = std::pair<size_t, size_t>;
@@ -74,6 +75,18 @@
     return RangeSet(std::move(pairs));
   }
 
+  std::string ToString() const {
+    if (ranges_.empty()) {
+      return "";
+    }
+    std::string result = std::to_string(ranges_.size() * 2);
+    for (const auto& r : ranges_) {
+      result += android::base::StringPrintf(",%zu,%zu", r.first, r.second);
+    }
+
+    return result;
+  }
+
   // Get the block number for the i-th (starting from 0) block in the RangeSet.
   size_t GetBlockNumber(size_t idx) const {
     CHECK_LT(idx, blocks_) << "Out of bound index " << idx << " (total blocks: " << blocks_ << ")";
@@ -157,8 +170,109 @@
     return ranges_ != other.ranges_;
   }
 
- private:
+ protected:
   // Actual limit for each value and the total number are both INT_MAX.
   std::vector<Range> ranges_;
   size_t blocks_;
 };
+
+static constexpr size_t kBlockSize = 4096;
+
+// The class is a sorted version of a RangeSet; and it's useful in imgdiff to split the input
+// files when we're handling large zip files. Specifically, we can treat the input file as a
+// continuous RangeSet (i.e. RangeSet("0-99") for a 100 blocks file); and break it down into
+// several smaller chunks based on the zip entries.
+
+// For example, [source: 0-99] can be split into
+// [split_src1: 10-29]; [split_src2: 40-49, 60-69]; [split_src3: 70-89]
+// Here "10-29" simply means block 10th to block 29th with respect to the original input file.
+// Also, note that the split sources should be mutual exclusive, but they don't need to cover
+// every block in the original source.
+class SortedRangeSet : public RangeSet {
+ public:
+  SortedRangeSet() {}
+
+  // Ranges in the the set should be mutually exclusive; and they're sorted by the start block.
+  explicit SortedRangeSet(std::vector<Range>&& pairs) : RangeSet(std::move(pairs)) {
+    std::sort(ranges_.begin(), ranges_.end());
+  }
+
+  void Insert(const Range& to_insert) {
+    SortedRangeSet rs({ to_insert });
+    Insert(rs);
+  }
+
+  // Insert the input SortedRangeSet; keep the ranges sorted and merge the overlap ranges.
+  void Insert(const SortedRangeSet& rs) {
+    if (rs.size() == 0) {
+      return;
+    }
+    // Merge and sort the two RangeSets.
+    std::vector<Range> temp = std::move(ranges_);
+    std::copy(rs.begin(), rs.end(), std::back_inserter(temp));
+    std::sort(temp.begin(), temp.end());
+
+    Clear();
+    // Trim overlaps and insert the result back to ranges_.
+    Range to_insert = temp.front();
+    for (auto it = temp.cbegin() + 1; it != temp.cend(); it++) {
+      if (it->first <= to_insert.second) {
+        to_insert.second = std::max(to_insert.second, it->second);
+      } else {
+        ranges_.push_back(to_insert);
+        blocks_ += (to_insert.second - to_insert.first);
+        to_insert = *it;
+      }
+    }
+    ranges_.push_back(to_insert);
+    blocks_ += (to_insert.second - to_insert.first);
+  }
+
+  void Clear() {
+    blocks_ = 0;
+    ranges_.clear();
+  }
+
+  using RangeSet::Overlaps;
+  bool Overlaps(size_t start, size_t len) const {
+    RangeSet rs({ { start / kBlockSize, (start + len - 1) / kBlockSize + 1 } });
+    return Overlaps(rs);
+  }
+
+  // Compute the block range the file occupies, and insert that range.
+  void Insert(size_t start, size_t len) {
+    Range to_insert{ start / kBlockSize, (start + len - 1) / kBlockSize + 1 };
+    Insert(to_insert);
+  }
+
+  // Given an offset of the file, checks if the corresponding block (by considering the file as
+  // 0-based continuous block ranges) is covered by the SortedRangeSet. If so, returns the offset
+  // within this SortedRangeSet.
+  //
+  // For example, the 4106-th byte of a file is from block 1, assuming a block size of 4096-byte.
+  // The mapped offset within a SortedRangeSet("1-9 15-19") is 10.
+  //
+  // An offset of 65546 falls into the 16-th block in a file. Block 16 is contained as the 10-th
+  // item in SortedRangeSet("1-9 15-19"). So its data can be found at offset 40970 (i.e. 4096 * 10
+  // + 10) in a range represented by this SortedRangeSet.
+  size_t GetOffsetInRangeSet(size_t old_offset) const {
+    size_t old_block_start = old_offset / kBlockSize;
+    size_t new_block_start = 0;
+    for (const auto& range : ranges_) {
+      // Find the index of old_block_start.
+      if (old_block_start >= range.second) {
+        new_block_start += (range.second - range.first);
+      } else if (old_block_start >= range.first) {
+        new_block_start += (old_block_start - range.first);
+        return (new_block_start * kBlockSize + old_offset % kBlockSize);
+      } else {
+        CHECK(false) <<"block_start " << old_block_start << " is missing between two ranges: "
+                     << this->ToString();
+        return 0;
+      }
+    }
+    CHECK(false) <<"block_start " << old_block_start << " exceeds the limit of current RangeSet: "
+                 << this->ToString();
+    return 0;
+  }
+};
\ No newline at end of file