Blame - applypatch/imgdiff.c - android_bootable_recovery

blob: 6b9ebee5c5e577c075c66c946dbf04f370e675a0 [file] [log] [blame]

Doug Zongker	512536a	2010-02-17 16:11:44 -0800	[diff] [blame]	1	/*
				2	* Copyright (C) 2009 The Android Open Source Project
				3	*
				4	* Licensed under the Apache License, Version 2.0 (the "License");
				5	* you may not use this file except in compliance with the License.
				6	* You may obtain a copy of the License at
				7	*
				8	* http://www.apache.org/licenses/LICENSE-2.0
				9	*
				10	* Unless required by applicable law or agreed to in writing, software
				11	* distributed under the License is distributed on an "AS IS" BASIS,
				12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	* See the License for the specific language governing permissions and
				14	* limitations under the License.
				15	*/
				16
				17	/*
				18	* This program constructs binary patches for images -- such as boot.img
				19	* and recovery.img -- that consist primarily of large chunks of gzipped
				20	* data interspersed with uncompressed data. Doing a naive bsdiff of
				21	* these files is not useful because small changes in the data lead to
				22	* large changes in the compressed bitstream; bsdiff patches of gzipped
				23	* data are typically as large as the data itself.
				24	*
				25	* To patch these usefully, we break the source and target images up into
				26	* chunks of two types: "normal" and "gzip". Normal chunks are simply
				27	* patched using a plain bsdiff. Gzip chunks are first expanded, then a
				28	* bsdiff is applied to the uncompressed data, then the patched data is
				29	* gzipped using the same encoder parameters. Patched chunks are
				30	* concatenated together to create the output file; the output image
				31	* should be exactly the same series of bytes as the target image used
				32	* originally to generate the patch.
				33	*
				34	* To work well with this tool, the gzipped sections of the target
				35	* image must have been generated using the same deflate encoder that
				36	* is available in applypatch, namely, the one in the zlib library.
				37	* In practice this means that images should be compressed using the
				38	* "minigzip" tool included in the zlib distribution, not the GNU gzip
				39	* program.
				40	*
				41	* An "imgdiff" patch consists of a header describing the chunk structure
				42	* of the file and any encoding parameters needed for the gzipped
				43	* chunks, followed by N bsdiff patches, one per chunk.
				44	*
				45	* For a diff to be generated, the source and target images must have the
				46	* same "chunk" structure: that is, the same number of gzipped and normal
				47	* chunks in the same order. Android boot and recovery images currently
				48	* consist of five chunks: a small normal header, a gzipped kernel, a
				49	* small normal section, a gzipped ramdisk, and finally a small normal
				50	* footer.
				51	*
				52	* Caveats: we locate gzipped sections within the source and target
				53	* images by searching for the byte sequence 1f8b0800: 1f8b is the gzip
				54	* magic number; 08 specifies the "deflate" encoding [the only encoding
				55	* supported by the gzip standard]; and 00 is the flags byte. We do not
				56	* currently support any extra header fields (which would be indicated by
				57	* a nonzero flags byte). We also don't handle the case when that byte
				58	* sequence appears spuriously in the file. (Note that it would have to
				59	* occur spuriously within a normal chunk to be a problem.)
				60	*
				61	*
				62	* The imgdiff patch header looks like this:
				63	*
				64	* "IMGDIFF1" (8) [magic number and version]
				65	* chunk count (4)
				66	* for each chunk:
				67	* chunk type (4) [CHUNK_{NORMAL, GZIP, DEFLATE, RAW}]
				68	* if chunk type == CHUNK_NORMAL:
				69	* source start (8)
				70	* source len (8)
				71	* bsdiff patch offset (8) [from start of patch file]
				72	* if chunk type == CHUNK_GZIP: (version 1 only)
				73	* source start (8)
				74	* source len (8)
				75	* bsdiff patch offset (8) [from start of patch file]
				76	* source expanded len (8) [size of uncompressed source]
				77	* target expected len (8) [size of uncompressed target]
				78	* gzip level (4)
				79	* method (4)
				80	* windowBits (4)
				81	* memLevel (4)
				82	* strategy (4)
				83	* gzip header len (4)
				84	* gzip header (gzip header len)
				85	* gzip footer (8)
				86	* if chunk type == CHUNK_DEFLATE: (version 2 only)
				87	* source start (8)
				88	* source len (8)
				89	* bsdiff patch offset (8) [from start of patch file]
				90	* source expanded len (8) [size of uncompressed source]
				91	* target expected len (8) [size of uncompressed target]
				92	* gzip level (4)
				93	* method (4)
				94	* windowBits (4)
				95	* memLevel (4)
				96	* strategy (4)
				97	* if chunk type == RAW: (version 2 only)
				98	* target len (4)
				99	* data (target len)
				100	*
				101	* All integers are little-endian. "source start" and "source len"
				102	* specify the section of the input image that comprises this chunk,
				103	* including the gzip header and footer for gzip chunks. "source
				104	* expanded len" is the size of the uncompressed source data. "target
				105	* expected len" is the size of the uncompressed data after applying
				106	* the bsdiff patch. The next five parameters specify the zlib
				107	* parameters to be used when compressing the patched data, and the
				108	* next three specify the header and footer to be wrapped around the
				109	* compressed data to create the output chunk (so that header contents
				110	* like the timestamp are recreated exactly).
				111	*
				112	* After the header there are 'chunk count' bsdiff patches; the offset
				113	* of each from the beginning of the file is specified in the header.
				114	*/
				115
				116	#include <errno.h>
				117	#include <stdio.h>
				118	#include <stdlib.h>
				119	#include <string.h>
				120	#include <sys/stat.h>
				121	#include <unistd.h>
				122	#include <sys/types.h>
				123
				124	#include "zlib.h"
				125	#include "imgdiff.h"
				126	#include "utils.h"
				127
				128	typedef struct {
				129	int type; // CHUNK_NORMAL, CHUNK_DEFLATE
				130	size_t start; // offset of chunk in original image file
				131
				132	size_t len;
				133	unsigned char* data; // data to be patched (uncompressed, for deflate chunks)
				134
				135	size_t source_start;
				136	size_t source_len;
				137
				138	off_t* I; // used by bsdiff
				139
				140	// --- for CHUNK_DEFLATE chunks only: ---
				141
				142	// original (compressed) deflate data
				143	size_t deflate_len;
				144	unsigned char* deflate_data;
				145
				146	char* filename; // used for zip entries
				147
				148	// deflate encoder parameters
				149	int level, method, windowBits, memLevel, strategy;
				150
				151	size_t source_uncompressed_len;
				152	} ImageChunk;
				153
				154	typedef struct {
				155	int data_offset;
				156	int deflate_len;
				157	int uncomp_len;
				158	char* filename;
				159	} ZipFileEntry;
				160
				161	static int fileentry_compare(const void* a, const void* b) {
				162	int ao = ((ZipFileEntry*)a)->data_offset;
				163	int bo = ((ZipFileEntry*)b)->data_offset;
				164	if (ao < bo) {
				165	return -1;
				166	} else if (ao > bo) {
				167	return 1;
				168	} else {
				169	return 0;
				170	}
				171	}
				172
				173	// from bsdiff.c
				174	int bsdiff(u_char* old, off_t oldsize, off_t** IP, u_char* new, off_t newsize,
				175	const char* patch_filename);
				176
				177	unsigned char* ReadZip(const char* filename,
				178	int* num_chunks, ImageChunk** chunks,
				179	int include_pseudo_chunk) {
				180	struct stat st;
				181	if (stat(filename, &st) != 0) {
				182	printf("failed to stat \"%s\": %s\n", filename, strerror(errno));
				183	return NULL;
				184	}
				185
				186	unsigned char* img = malloc(st.st_size);
				187	FILE* f = fopen(filename, "rb");
				188	if (fread(img, 1, st.st_size, f) != st.st_size) {
				189	printf("failed to read \"%s\" %s\n", filename, strerror(errno));
				190	fclose(f);
				191	return NULL;
				192	}
				193	fclose(f);
				194
				195	// look for the end-of-central-directory record.
				196
				197	int i;
				198	for (i = st.st_size-20; i >= 0 && i > st.st_size - 65600; --i) {
				199	if (img[i] == 0x50 && img[i+1] == 0x4b &&
				200	img[i+2] == 0x05 && img[i+3] == 0x06) {
				201	break;
				202	}
				203	}
				204	// double-check: this archive consists of a single "disk"
				205	if (!(img[i+4] == 0 && img[i+5] == 0 && img[i+6] == 0 && img[i+7] == 0)) {
				206	printf("can't process multi-disk archive\n");
				207	return NULL;
				208	}
				209
				210	int cdcount = Read2(img+i+8);
				211	int cdoffset = Read4(img+i+16);
				212
				213	ZipFileEntry* temp_entries = malloc(cdcount * sizeof(ZipFileEntry));
				214	int entrycount = 0;
				215
				216	unsigned char* cd = img+cdoffset;
				217	for (i = 0; i < cdcount; ++i) {
				218	if (!(cd[0] == 0x50 && cd[1] == 0x4b && cd[2] == 0x01 && cd[3] == 0x02)) {
				219	printf("bad central directory entry %d\n", i);
				220	return NULL;
				221	}
				222
				223	int clen = Read4(cd+20); // compressed len
				224	int ulen = Read4(cd+24); // uncompressed len
				225	int nlen = Read2(cd+28); // filename len
				226	int xlen = Read2(cd+30); // extra field len
				227	int mlen = Read2(cd+32); // file comment len
				228	int hoffset = Read4(cd+42); // local header offset
				229
				230	char* filename = malloc(nlen+1);
				231	memcpy(filename, cd+46, nlen);
				232	filename[nlen] = '\0';
				233
				234	int method = Read2(cd+10);
				235
				236	cd += 46 + nlen + xlen + mlen;
				237
				238	if (method != 8) { // 8 == deflate
				239	free(filename);
				240	continue;
				241	}
				242
				243	unsigned char* lh = img + hoffset;
				244
				245	if (!(lh[0] == 0x50 && lh[1] == 0x4b && lh[2] == 0x03 && lh[3] == 0x04)) {
				246	printf("bad local file header entry %d\n", i);
				247	return NULL;
				248	}
				249
				250	if (Read2(lh+26) != nlen \|\| memcmp(lh+30, filename, nlen) != 0) {
				251	printf("central dir filename doesn't match local header\n");
				252	return NULL;
				253	}
				254
				255	xlen = Read2(lh+28); // extra field len; might be different from CD entry?
				256
				257	temp_entries[entrycount].data_offset = hoffset+30+nlen+xlen;
				258	temp_entries[entrycount].deflate_len = clen;
				259	temp_entries[entrycount].uncomp_len = ulen;
				260	temp_entries[entrycount].filename = filename;
				261	++entrycount;
				262	}
				263
				264	qsort(temp_entries, entrycount, sizeof(ZipFileEntry), fileentry_compare);
				265
				266	#if 0
				267	printf("found %d deflated entries\n", entrycount);
				268	for (i = 0; i < entrycount; ++i) {
				269	printf("off %10d len %10d unlen %10d %p %s\n",
				270	temp_entries[i].data_offset,
				271	temp_entries[i].deflate_len,
				272	temp_entries[i].uncomp_len,
				273	temp_entries[i].filename,
				274	temp_entries[i].filename);
				275	}
				276	#endif
				277
				278	*num_chunks = 0;
				279	chunks = malloc((entrycount2+2) * sizeof(ImageChunk));
				280	ImageChunk* curr = *chunks;
				281
				282	if (include_pseudo_chunk) {
				283	curr->type = CHUNK_NORMAL;
				284	curr->start = 0;
				285	curr->len = st.st_size;
				286	curr->data = img;
				287	curr->filename = NULL;
				288	curr->I = NULL;
				289	++curr;
				290	++*num_chunks;
				291	}
				292
				293	int pos = 0;
				294	int nextentry = 0;
				295
				296	while (pos < st.st_size) {
				297	if (nextentry < entrycount && pos == temp_entries[nextentry].data_offset) {
				298	curr->type = CHUNK_DEFLATE;
				299	curr->start = pos;
				300	curr->deflate_len = temp_entries[nextentry].deflate_len;
				301	curr->deflate_data = img + pos;
				302	curr->filename = temp_entries[nextentry].filename;
				303	curr->I = NULL;
				304
				305	curr->len = temp_entries[nextentry].uncomp_len;
				306	curr->data = malloc(curr->len);
				307
				308	z_stream strm;
				309	strm.zalloc = Z_NULL;
				310	strm.zfree = Z_NULL;
				311	strm.opaque = Z_NULL;
				312	strm.avail_in = curr->deflate_len;
				313	strm.next_in = curr->deflate_data;
				314
				315	// -15 means we are decoding a 'raw' deflate stream; zlib will
				316	// not expect zlib headers.
				317	int ret = inflateInit2(&strm, -15);
				318
				319	strm.avail_out = curr->len;
				320	strm.next_out = curr->data;
				321	ret = inflate(&strm, Z_NO_FLUSH);
				322	if (ret != Z_STREAM_END) {
				323	printf("failed to inflate \"%s\"; %d\n", curr->filename, ret);
				324	return NULL;
				325	}
				326
				327	inflateEnd(&strm);
				328
				329	pos += curr->deflate_len;
				330	++nextentry;
				331	++*num_chunks;
				332	++curr;
				333	continue;
				334	}
				335
				336	// use a normal chunk to take all the data up to the start of the
				337	// next deflate section.
				338
				339	curr->type = CHUNK_NORMAL;
				340	curr->start = pos;
				341	if (nextentry < entrycount) {
				342	curr->len = temp_entries[nextentry].data_offset - pos;
				343	} else {
				344	curr->len = st.st_size - pos;
				345	}
				346	curr->data = img + pos;
				347	curr->filename = NULL;
				348	curr->I = NULL;
				349	pos += curr->len;
				350
				351	++*num_chunks;
				352	++curr;
				353	}
				354
				355	free(temp_entries);
				356	return img;
				357	}
				358
				359	/*
				360	* Read the given file and break it up into chunks, putting the number
				361	* of chunks and their info in num_chunks and *chunks,
				362	* respectively. Returns a malloc'd block of memory containing the
				363	* contents of the file; various pointers in the output chunk array
				364	* will point into this block of memory. The caller should free the
				365	* return value when done with all the chunks. Returns NULL on
				366	* failure.
				367	*/
				368	unsigned char* ReadImage(const char* filename,
				369	int* num_chunks, ImageChunk** chunks) {
				370	struct stat st;
				371	if (stat(filename, &st) != 0) {
				372	printf("failed to stat \"%s\": %s\n", filename, strerror(errno));
				373	return NULL;
				374	}
				375
				376	unsigned char* img = malloc(st.st_size + 4);
				377	FILE* f = fopen(filename, "rb");
				378	if (fread(img, 1, st.st_size, f) != st.st_size) {
				379	printf("failed to read \"%s\" %s\n", filename, strerror(errno));
				380	fclose(f);
				381	return NULL;
				382	}
				383	fclose(f);
				384
				385	// append 4 zero bytes to the data so we can always search for the
				386	// four-byte string 1f8b0800 starting at any point in the actual
				387	// file data, without special-casing the end of the data.
				388	memset(img+st.st_size, 0, 4);
				389
				390	size_t pos = 0;
				391
				392	*num_chunks = 0;
				393	*chunks = NULL;
				394
				395	while (pos < st.st_size) {
				396	unsigned char* p = img+pos;
				397
				398	if (st.st_size - pos >= 4 &&
				399	p[0] == 0x1f && p[1] == 0x8b &&
				400	p[2] == 0x08 && // deflate compression
				401	p[3] == 0x00) { // no header flags
				402	// 'pos' is the offset of the start of a gzip chunk.
				403
				404	*num_chunks += 3;
				405	chunks = realloc(chunks, num_chunks sizeof(ImageChunk));
				406	ImageChunk* curr = chunks + (num_chunks-3);
				407
				408	// create a normal chunk for the header.
				409	curr->start = pos;
				410	curr->type = CHUNK_NORMAL;
				411	curr->len = GZIP_HEADER_LEN;
				412	curr->data = p;
				413	curr->I = NULL;
				414
				415	pos += curr->len;
				416	p += curr->len;
				417	++curr;
				418
				419	curr->type = CHUNK_DEFLATE;
				420	curr->filename = NULL;
				421	curr->I = NULL;
				422
				423	// We must decompress this chunk in order to discover where it
				424	// ends, and so we can put the uncompressed data and its length
				425	// into curr->data and curr->len.
				426
				427	size_t allocated = 32768;
				428	curr->len = 0;
				429	curr->data = malloc(allocated);
				430	curr->start = pos;
				431	curr->deflate_data = p;
				432
				433	z_stream strm;
				434	strm.zalloc = Z_NULL;
				435	strm.zfree = Z_NULL;
				436	strm.opaque = Z_NULL;
				437	strm.avail_in = st.st_size - pos;
				438	strm.next_in = p;
				439
				440	// -15 means we are decoding a 'raw' deflate stream; zlib will
				441	// not expect zlib headers.
				442	int ret = inflateInit2(&strm, -15);
				443
				444	do {
				445	strm.avail_out = allocated - curr->len;
				446	strm.next_out = curr->data + curr->len;
				447	ret = inflate(&strm, Z_NO_FLUSH);
				448	curr->len = allocated - strm.avail_out;
				449	if (strm.avail_out == 0) {
				450	allocated *= 2;
				451	curr->data = realloc(curr->data, allocated);
				452	}
				453	} while (ret != Z_STREAM_END);
				454
				455	curr->deflate_len = st.st_size - strm.avail_in - pos;
				456	inflateEnd(&strm);
				457	pos += curr->deflate_len;
				458	p += curr->deflate_len;
				459	++curr;
				460
				461	// create a normal chunk for the footer
				462
				463	curr->type = CHUNK_NORMAL;
				464	curr->start = pos;
				465	curr->len = GZIP_FOOTER_LEN;
				466	curr->data = img+pos;
				467	curr->I = NULL;
				468
				469	pos += curr->len;
				470	p += curr->len;
				471	++curr;
				472
				473	// The footer (that we just skipped over) contains the size of
				474	// the uncompressed data. Double-check to make sure that it
				475	// matches the size of the data we got when we actually did
				476	// the decompression.
				477	size_t footer_size = Read4(p-4);
				478	if (footer_size != curr[-2].len) {
				479	printf("Error: footer size %d != decompressed size %d\n",
				480	footer_size, curr[-2].len);
				481	free(img);
				482	return NULL;
				483	}
				484	} else {
				485	// Reallocate the list for every chunk; we expect the number of
				486	// chunks to be small (5 for typical boot and recovery images).
				487	++*num_chunks;
				488	chunks = realloc(chunks, num_chunks sizeof(ImageChunk));
				489	ImageChunk* curr = chunks + (num_chunks-1);
				490	curr->start = pos;
				491	curr->I = NULL;
				492
				493	// 'pos' is not the offset of the start of a gzip chunk, so scan
				494	// forward until we find a gzip header.
				495	curr->type = CHUNK_NORMAL;
				496	curr->data = p;
				497
				498	for (curr->len = 0; curr->len < (st.st_size - pos); ++curr->len) {
				499	if (p[curr->len] == 0x1f &&
				500	p[curr->len+1] == 0x8b &&
				501	p[curr->len+2] == 0x08 &&
				502	p[curr->len+3] == 0x00) {
				503	break;
				504	}
				505	}
				506	pos += curr->len;
				507	}
				508	}
				509
				510	return img;
				511	}
				512
				513	#define BUFFER_SIZE 32768
				514
				515	/*
				516	* Takes the uncompressed data stored in the chunk, compresses it
				517	* using the zlib parameters stored in the chunk, and checks that it
				518	* matches exactly the compressed data we started with (also stored in
				519	* the chunk). Return 0 on success.
				520	*/
				521	int TryReconstruction(ImageChunk* chunk, unsigned char* out) {
				522	size_t p = 0;
				523
				524	#if 0
				525	printf("trying %d %d %d %d %d\n",
				526	chunk->level, chunk->method, chunk->windowBits,
				527	chunk->memLevel, chunk->strategy);
				528	#endif
				529
				530	z_stream strm;
				531	strm.zalloc = Z_NULL;
				532	strm.zfree = Z_NULL;
				533	strm.opaque = Z_NULL;
				534	strm.avail_in = chunk->len;
				535	strm.next_in = chunk->data;
				536	int ret;
				537	ret = deflateInit2(&strm, chunk->level, chunk->method, chunk->windowBits,
				538	chunk->memLevel, chunk->strategy);
				539	do {
				540	strm.avail_out = BUFFER_SIZE;
				541	strm.next_out = out;
				542	ret = deflate(&strm, Z_FINISH);
				543	size_t have = BUFFER_SIZE - strm.avail_out;
				544
				545	if (memcmp(out, chunk->deflate_data+p, have) != 0) {
				546	// mismatch; data isn't the same.
				547	deflateEnd(&strm);
				548	return -1;
				549	}
				550	p += have;
				551	} while (ret != Z_STREAM_END);
				552	deflateEnd(&strm);
				553	if (p != chunk->deflate_len) {
				554	// mismatch; ran out of data before we should have.
				555	return -1;
				556	}
				557	return 0;
				558	}
				559
				560	/*
				561	* Verify that we can reproduce exactly the same compressed data that
				562	* we started with. Sets the level, method, windowBits, memLevel, and
				563	* strategy fields in the chunk to the encoding parameters needed to
				564	* produce the right output. Returns 0 on success.
				565	*/
				566	int ReconstructDeflateChunk(ImageChunk* chunk) {
				567	if (chunk->type != CHUNK_DEFLATE) {
				568	printf("attempt to reconstruct non-deflate chunk\n");
				569	return -1;
				570	}
				571
				572	size_t p = 0;
				573	unsigned char* out = malloc(BUFFER_SIZE);
				574
				575	// We only check two combinations of encoder parameters: level 6
				576	// (the default) and level 9 (the maximum).
				577	for (chunk->level = 6; chunk->level <= 9; chunk->level += 3) {
				578	chunk->windowBits = -15; // 32kb window; negative to indicate a raw stream.
				579	chunk->memLevel = 8; // the default value.
				580	chunk->method = Z_DEFLATED;
				581	chunk->strategy = Z_DEFAULT_STRATEGY;
				582
				583	if (TryReconstruction(chunk, out) == 0) {
				584	free(out);
				585	return 0;
				586	}
				587	}
				588
				589	free(out);
				590	return -1;
				591	}
				592
				593	/*
				594	* Given source and target chunks, compute a bsdiff patch between them
				595	* by running bsdiff in a subprocess. Return the patch data, placing
				596	* its length in *size. Return NULL on failure. We expect the bsdiff
				597	* program to be in the path.
				598	*/
				599	unsigned char* MakePatch(ImageChunk* src, ImageChunk* tgt, size_t* size) {
				600	if (tgt->type == CHUNK_NORMAL) {
				601	if (tgt->len <= 160) {
				602	tgt->type = CHUNK_RAW;
				603	*size = tgt->len;
				604	return tgt->data;
				605	}
				606	}
				607
				608	char ptemp[] = "/tmp/imgdiff-patch-XXXXXX";
				609	mkstemp(ptemp);
				610
				611	int r = bsdiff(src->data, src->len, &(src->I), tgt->data, tgt->len, ptemp);
				612	if (r != 0) {
				613	printf("bsdiff() failed: %d\n", r);
				614	return NULL;
				615	}
				616
				617	struct stat st;
				618	if (stat(ptemp, &st) != 0) {
				619	printf("failed to stat patch file %s: %s\n",
				620	ptemp, strerror(errno));
				621	return NULL;
				622	}
				623
				624	unsigned char* data = malloc(st.st_size);
				625
				626	if (tgt->type == CHUNK_NORMAL && tgt->len <= st.st_size) {
				627	unlink(ptemp);
				628
				629	tgt->type = CHUNK_RAW;
				630	*size = tgt->len;
				631	return tgt->data;
				632	}
				633
				634	*size = st.st_size;
				635
				636	FILE* f = fopen(ptemp, "rb");
				637	if (f == NULL) {
				638	printf("failed to open patch %s: %s\n", ptemp, strerror(errno));
				639	return NULL;
				640	}
				641	if (fread(data, 1, st.st_size, f) != st.st_size) {
				642	printf("failed to read patch %s: %s\n", ptemp, strerror(errno));
				643	return NULL;
				644	}
				645	fclose(f);
				646
				647	unlink(ptemp);
				648
				649	tgt->source_start = src->start;
				650	switch (tgt->type) {
				651	case CHUNK_NORMAL:
				652	tgt->source_len = src->len;
				653	break;
				654	case CHUNK_DEFLATE:
				655	tgt->source_len = src->deflate_len;
				656	tgt->source_uncompressed_len = src->len;
				657	break;
				658	}
				659
				660	return data;
				661	}
				662
				663	/*
				664	* Cause a gzip chunk to be treated as a normal chunk (ie, as a blob
				665	* of uninterpreted data). The resulting patch will likely be about
				666	* as big as the target file, but it lets us handle the case of images
				667	* where some gzip chunks are reconstructible but others aren't (by
				668	* treating the ones that aren't as normal chunks).
				669	*/
				670	void ChangeDeflateChunkToNormal(ImageChunk* ch) {
				671	if (ch->type != CHUNK_DEFLATE) return;
				672	ch->type = CHUNK_NORMAL;
				673	free(ch->data);
				674	ch->data = ch->deflate_data;
				675	ch->len = ch->deflate_len;
				676	}
				677
				678	/*
				679	* Return true if the data in the chunk is identical (including the
				680	* compressed representation, for gzip chunks).
				681	*/
				682	int AreChunksEqual(ImageChunk* a, ImageChunk* b) {
				683	if (a->type != b->type) return 0;
				684
				685	switch (a->type) {
				686	case CHUNK_NORMAL:
				687	return a->len == b->len && memcmp(a->data, b->data, a->len) == 0;
				688
				689	case CHUNK_DEFLATE:
				690	return a->deflate_len == b->deflate_len &&
				691	memcmp(a->deflate_data, b->deflate_data, a->deflate_len) == 0;
				692
				693	default:
				694	printf("unknown chunk type %d\n", a->type);
				695	return 0;
				696	}
				697	}
				698
				699	/*
				700	* Look for runs of adjacent normal chunks and compress them down into
				701	* a single chunk. (Such runs can be produced when deflate chunks are
				702	* changed to normal chunks.)
				703	*/
				704	void MergeAdjacentNormalChunks(ImageChunk* chunks, int* num_chunks) {
				705	int out = 0;
				706	int in_start = 0, in_end;
				707	while (in_start < *num_chunks) {
				708	if (chunks[in_start].type != CHUNK_NORMAL) {
				709	in_end = in_start+1;
				710	} else {
				711	// in_start is a normal chunk. Look for a run of normal chunks
				712	// that constitute a solid block of data (ie, each chunk begins
				713	// where the previous one ended).
				714	for (in_end = in_start+1;
				715	in_end < *num_chunks && chunks[in_end].type == CHUNK_NORMAL &&
				716	(chunks[in_end].start ==
				717	chunks[in_end-1].start + chunks[in_end-1].len &&
				718	chunks[in_end].data ==
				719	chunks[in_end-1].data + chunks[in_end-1].len);
				720	++in_end);
				721	}
				722
				723	if (in_end == in_start+1) {
				724	#if 0
				725	printf("chunk %d is now %d\n", in_start, out);
				726	#endif
				727	if (out != in_start) {
				728	memcpy(chunks+out, chunks+in_start, sizeof(ImageChunk));
				729	}
				730	} else {
				731	#if 0
				732	printf("collapse normal chunks %d-%d into %d\n", in_start, in_end-1, out);
				733	#endif
				734
				735	// Merge chunks [in_start, in_end-1] into one chunk. Since the
				736	// data member of each chunk is just a pointer into an in-memory
				737	// copy of the file, this can be done without recopying (the
				738	// output chunk has the first chunk's start location and data
				739	// pointer, and length equal to the sum of the input chunk
				740	// lengths).
				741	chunks[out].type = CHUNK_NORMAL;
				742	chunks[out].start = chunks[in_start].start;
				743	chunks[out].data = chunks[in_start].data;
				744	chunks[out].len = chunks[in_end-1].len +
				745	(chunks[in_end-1].start - chunks[in_start].start);
				746	}
				747
				748	++out;
				749	in_start = in_end;
				750	}
				751	*num_chunks = out;
				752	}
				753
				754	ImageChunk* FindChunkByName(const char* name,
				755	ImageChunk* chunks, int num_chunks) {
				756	int i;
				757	for (i = 0; i < num_chunks; ++i) {
				758	if (chunks[i].type == CHUNK_DEFLATE && chunks[i].filename &&
				759	strcmp(name, chunks[i].filename) == 0) {
				760	return chunks+i;
				761	}
				762	}
				763	return NULL;
				764	}
				765
				766	void DumpChunks(ImageChunk* chunks, int num_chunks) {
				767	int i;
				768	for (i = 0; i < num_chunks; ++i) {
				769	printf("chunk %d: type %d start %d len %d\n",
				770	i, chunks[i].type, chunks[i].start, chunks[i].len);
				771	}
				772	}
				773
				774	int main(int argc, char** argv) {
				775	if (argc != 4 && argc != 5) {
				776	usage:
				777	printf("usage: %s [-z] <src-img> <tgt-img> <patch-file>\n",
				778	argv[0]);
				779	return 2;
				780	}
				781
				782	int zip_mode = 0;
				783
				784	if (strcmp(argv[1], "-z") == 0) {
				785	zip_mode = 1;
				786	--argc;
				787	++argv;
				788	}
				789
				790
				791	int num_src_chunks;
				792	ImageChunk* src_chunks;
				793	int num_tgt_chunks;
				794	ImageChunk* tgt_chunks;
				795	int i;
				796
				797	if (zip_mode) {
				798	if (ReadZip(argv[1], &num_src_chunks, &src_chunks, 1) == NULL) {
				799	printf("failed to break apart source zip file\n");
				800	return 1;
				801	}
				802	if (ReadZip(argv[2], &num_tgt_chunks, &tgt_chunks, 0) == NULL) {
				803	printf("failed to break apart target zip file\n");
				804	return 1;
				805	}
				806	} else {
				807	if (ReadImage(argv[1], &num_src_chunks, &src_chunks) == NULL) {
				808	printf("failed to break apart source image\n");
				809	return 1;
				810	}
				811	if (ReadImage(argv[2], &num_tgt_chunks, &tgt_chunks) == NULL) {
				812	printf("failed to break apart target image\n");
				813	return 1;
				814	}
				815
				816	// Verify that the source and target images have the same chunk
				817	// structure (ie, the same sequence of deflate and normal chunks).
				818
				819	if (!zip_mode) {
				820	// Merge the gzip header and footer in with any adjacent
				821	// normal chunks.
				822	MergeAdjacentNormalChunks(tgt_chunks, &num_tgt_chunks);
				823	MergeAdjacentNormalChunks(src_chunks, &num_src_chunks);
				824	}
				825
				826	if (num_src_chunks != num_tgt_chunks) {
				827	printf("source and target don't have same number of chunks!\n");
				828	printf("source chunks:\n");
				829	DumpChunks(src_chunks, num_src_chunks);
				830	printf("target chunks:\n");
				831	DumpChunks(tgt_chunks, num_tgt_chunks);
				832	return 1;
				833	}
				834	for (i = 0; i < num_src_chunks; ++i) {
				835	if (src_chunks[i].type != tgt_chunks[i].type) {
				836	printf("source and target don't have same chunk "
				837	"structure! (chunk %d)\n", i);
				838	printf("source chunks:\n");
				839	DumpChunks(src_chunks, num_src_chunks);
				840	printf("target chunks:\n");
				841	DumpChunks(tgt_chunks, num_tgt_chunks);
				842	return 1;
				843	}
				844	}
				845	}
				846
				847	for (i = 0; i < num_tgt_chunks; ++i) {
				848	if (tgt_chunks[i].type == CHUNK_DEFLATE) {
				849	// Confirm that given the uncompressed chunk data in the target, we
				850	// can recompress it and get exactly the same bits as are in the
				851	// input target image. If this fails, treat the chunk as a normal
				852	// non-deflated chunk.
				853	if (ReconstructDeflateChunk(tgt_chunks+i) < 0) {
				854	printf("failed to reconstruct target deflate chunk %d [%s]; "
				855	"treating as normal\n", i, tgt_chunks[i].filename);
				856	ChangeDeflateChunkToNormal(tgt_chunks+i);
				857	if (zip_mode) {
				858	ImageChunk* src = FindChunkByName(tgt_chunks[i].filename, src_chunks, num_src_chunks);
				859	if (src) {
				860	ChangeDeflateChunkToNormal(src);
				861	}
				862	} else {
				863	ChangeDeflateChunkToNormal(src_chunks+i);
				864	}
				865	continue;
				866	}
				867
				868	// If two deflate chunks are identical (eg, the kernel has not
				869	// changed between two builds), treat them as normal chunks.
				870	// This makes applypatch much faster -- it can apply a trivial
				871	// patch to the compressed data, rather than uncompressing and
				872	// recompressing to apply the trivial patch to the uncompressed
				873	// data.
				874	ImageChunk* src;
				875	if (zip_mode) {
				876	src = FindChunkByName(tgt_chunks[i].filename, src_chunks, num_src_chunks);
				877	} else {
				878	src = src_chunks+i;
				879	}
				880
				881	if (src == NULL \|\| AreChunksEqual(tgt_chunks+i, src)) {
				882	ChangeDeflateChunkToNormal(tgt_chunks+i);
				883	if (src) {
				884	ChangeDeflateChunkToNormal(src);
				885	}
				886	}
				887	}
				888	}
				889
				890	// Merging neighboring normal chunks.
				891	if (zip_mode) {
				892	// For zips, we only need to do this to the target: deflated
				893	// chunks are matched via filename, and normal chunks are patched
				894	// using the entire source file as the source.
				895	MergeAdjacentNormalChunks(tgt_chunks, &num_tgt_chunks);
				896	} else {
				897	// For images, we need to maintain the parallel structure of the
				898	// chunk lists, so do the merging in both the source and target
				899	// lists.
				900	MergeAdjacentNormalChunks(tgt_chunks, &num_tgt_chunks);
				901	MergeAdjacentNormalChunks(src_chunks, &num_src_chunks);
				902	if (num_src_chunks != num_tgt_chunks) {
				903	// This shouldn't happen.
				904	printf("merging normal chunks went awry\n");
				905	return 1;
				906	}
				907	}
				908
				909	// Compute bsdiff patches for each chunk's data (the uncompressed
				910	// data, in the case of deflate chunks).
				911
				912	printf("Construct patches for %d chunks...\n", num_tgt_chunks);
				913	unsigned char** patch_data = malloc(num_tgt_chunks * sizeof(unsigned char*));
				914	size_t* patch_size = malloc(num_tgt_chunks * sizeof(size_t));
				915	for (i = 0; i < num_tgt_chunks; ++i) {
				916	if (zip_mode) {
				917	ImageChunk* src;
				918	if (tgt_chunks[i].type == CHUNK_DEFLATE &&
				919	(src = FindChunkByName(tgt_chunks[i].filename, src_chunks,
				920	num_src_chunks))) {
				921	patch_data[i] = MakePatch(src, tgt_chunks+i, patch_size+i);
				922	} else {
				923	patch_data[i] = MakePatch(src_chunks, tgt_chunks+i, patch_size+i);
				924	}
				925	} else {
				926	patch_data[i] = MakePatch(src_chunks+i, tgt_chunks+i, patch_size+i);
				927	}
				928	printf("patch %3d is %d bytes (of %d)\n",
				929	i, patch_size[i], tgt_chunks[i].source_len);
				930	}
				931
				932	// Figure out how big the imgdiff file header is going to be, so
				933	// that we can correctly compute the offset of each bsdiff patch
				934	// within the file.
				935
				936	size_t total_header_size = 12;
				937	for (i = 0; i < num_tgt_chunks; ++i) {
				938	total_header_size += 4;
				939	switch (tgt_chunks[i].type) {
				940	case CHUNK_NORMAL:
				941	total_header_size += 8*3;
				942	break;
				943	case CHUNK_DEFLATE:
				944	total_header_size += 85 + 45;
				945	break;
				946	case CHUNK_RAW:
				947	total_header_size += 4 + patch_size[i];
				948	break;
				949	}
				950	}
				951
				952	size_t offset = total_header_size;
				953
				954	FILE* f = fopen(argv[3], "wb");
				955
				956	// Write out the headers.
				957
				958	fwrite("IMGDIFF2", 1, 8, f);
				959	Write4(num_tgt_chunks, f);
				960	for (i = 0; i < num_tgt_chunks; ++i) {
				961	Write4(tgt_chunks[i].type, f);
				962
				963	switch (tgt_chunks[i].type) {
				964	case CHUNK_NORMAL:
				965	printf("chunk %3d: normal (%10d, %10d) %10d\n", i,
				966	tgt_chunks[i].start, tgt_chunks[i].len, patch_size[i]);
				967	Write8(tgt_chunks[i].source_start, f);
				968	Write8(tgt_chunks[i].source_len, f);
				969	Write8(offset, f);
				970	offset += patch_size[i];
				971	break;
				972
				973	case CHUNK_DEFLATE:
				974	printf("chunk %3d: deflate (%10d, %10d) %10d %s\n", i,
				975	tgt_chunks[i].start, tgt_chunks[i].deflate_len, patch_size[i],
				976	tgt_chunks[i].filename);
				977	Write8(tgt_chunks[i].source_start, f);
				978	Write8(tgt_chunks[i].source_len, f);
				979	Write8(offset, f);
				980	Write8(tgt_chunks[i].source_uncompressed_len, f);
				981	Write8(tgt_chunks[i].len, f);
				982	Write4(tgt_chunks[i].level, f);
				983	Write4(tgt_chunks[i].method, f);
				984	Write4(tgt_chunks[i].windowBits, f);
				985	Write4(tgt_chunks[i].memLevel, f);
				986	Write4(tgt_chunks[i].strategy, f);
				987	offset += patch_size[i];
				988	break;
				989
				990	case CHUNK_RAW:
				991	printf("chunk %3d: raw (%10d, %10d)\n", i,
				992	tgt_chunks[i].start, tgt_chunks[i].len);
				993	Write4(patch_size[i], f);
				994	fwrite(patch_data[i], 1, patch_size[i], f);
				995	break;
				996	}
				997	}
				998
				999	// Append each chunk's bsdiff patch, in order.
				1000
				1001	for (i = 0; i < num_tgt_chunks; ++i) {
				1002	if (tgt_chunks[i].type != CHUNK_RAW) {
				1003	fwrite(patch_data[i], 1, patch_size[i], f);
				1004	}
				1005	}
				1006
				1007	fclose(f);
				1008
				1009	return 0;
				1010	}