Blame - applypatch/imgdiff.c - android_bootable_recovery

blob: 05c4f250f101a8572567435c3509529f2f9c92ad [file] [log] [blame]

Doug Zongker	512536a	2010-02-17 16:11:44 -0800	[diff] [blame]	1	/*
				2	* Copyright (C) 2009 The Android Open Source Project
				3	*
				4	* Licensed under the Apache License, Version 2.0 (the "License");
				5	* you may not use this file except in compliance with the License.
				6	* You may obtain a copy of the License at
				7	*
				8	* http://www.apache.org/licenses/LICENSE-2.0
				9	*
				10	* Unless required by applicable law or agreed to in writing, software
				11	* distributed under the License is distributed on an "AS IS" BASIS,
				12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	* See the License for the specific language governing permissions and
				14	* limitations under the License.
				15	*/
				16
				17	/*
				18	* This program constructs binary patches for images -- such as boot.img
				19	* and recovery.img -- that consist primarily of large chunks of gzipped
				20	* data interspersed with uncompressed data. Doing a naive bsdiff of
				21	* these files is not useful because small changes in the data lead to
				22	* large changes in the compressed bitstream; bsdiff patches of gzipped
				23	* data are typically as large as the data itself.
				24	*
				25	* To patch these usefully, we break the source and target images up into
				26	* chunks of two types: "normal" and "gzip". Normal chunks are simply
				27	* patched using a plain bsdiff. Gzip chunks are first expanded, then a
				28	* bsdiff is applied to the uncompressed data, then the patched data is
				29	* gzipped using the same encoder parameters. Patched chunks are
				30	* concatenated together to create the output file; the output image
				31	* should be exactly the same series of bytes as the target image used
				32	* originally to generate the patch.
				33	*
				34	* To work well with this tool, the gzipped sections of the target
				35	* image must have been generated using the same deflate encoder that
				36	* is available in applypatch, namely, the one in the zlib library.
				37	* In practice this means that images should be compressed using the
				38	* "minigzip" tool included in the zlib distribution, not the GNU gzip
				39	* program.
				40	*
				41	* An "imgdiff" patch consists of a header describing the chunk structure
				42	* of the file and any encoding parameters needed for the gzipped
				43	* chunks, followed by N bsdiff patches, one per chunk.
				44	*
				45	* For a diff to be generated, the source and target images must have the
				46	* same "chunk" structure: that is, the same number of gzipped and normal
				47	* chunks in the same order. Android boot and recovery images currently
				48	* consist of five chunks: a small normal header, a gzipped kernel, a
				49	* small normal section, a gzipped ramdisk, and finally a small normal
				50	* footer.
				51	*
				52	* Caveats: we locate gzipped sections within the source and target
				53	* images by searching for the byte sequence 1f8b0800: 1f8b is the gzip
				54	* magic number; 08 specifies the "deflate" encoding [the only encoding
				55	* supported by the gzip standard]; and 00 is the flags byte. We do not
				56	* currently support any extra header fields (which would be indicated by
				57	* a nonzero flags byte). We also don't handle the case when that byte
				58	* sequence appears spuriously in the file. (Note that it would have to
				59	* occur spuriously within a normal chunk to be a problem.)
				60	*
				61	*
				62	* The imgdiff patch header looks like this:
				63	*
				64	* "IMGDIFF1" (8) [magic number and version]
				65	* chunk count (4)
				66	* for each chunk:
				67	* chunk type (4) [CHUNK_{NORMAL, GZIP, DEFLATE, RAW}]
				68	* if chunk type == CHUNK_NORMAL:
				69	* source start (8)
				70	* source len (8)
				71	* bsdiff patch offset (8) [from start of patch file]
				72	* if chunk type == CHUNK_GZIP: (version 1 only)
				73	* source start (8)
				74	* source len (8)
				75	* bsdiff patch offset (8) [from start of patch file]
				76	* source expanded len (8) [size of uncompressed source]
				77	* target expected len (8) [size of uncompressed target]
				78	* gzip level (4)
				79	* method (4)
				80	* windowBits (4)
				81	* memLevel (4)
				82	* strategy (4)
				83	* gzip header len (4)
				84	* gzip header (gzip header len)
				85	* gzip footer (8)
				86	* if chunk type == CHUNK_DEFLATE: (version 2 only)
				87	* source start (8)
				88	* source len (8)
				89	* bsdiff patch offset (8) [from start of patch file]
				90	* source expanded len (8) [size of uncompressed source]
				91	* target expected len (8) [size of uncompressed target]
				92	* gzip level (4)
				93	* method (4)
				94	* windowBits (4)
				95	* memLevel (4)
				96	* strategy (4)
				97	* if chunk type == RAW: (version 2 only)
				98	* target len (4)
				99	* data (target len)
				100	*
				101	* All integers are little-endian. "source start" and "source len"
				102	* specify the section of the input image that comprises this chunk,
				103	* including the gzip header and footer for gzip chunks. "source
				104	* expanded len" is the size of the uncompressed source data. "target
				105	* expected len" is the size of the uncompressed data after applying
				106	* the bsdiff patch. The next five parameters specify the zlib
				107	* parameters to be used when compressing the patched data, and the
				108	* next three specify the header and footer to be wrapped around the
				109	* compressed data to create the output chunk (so that header contents
				110	* like the timestamp are recreated exactly).
				111	*
				112	* After the header there are 'chunk count' bsdiff patches; the offset
				113	* of each from the beginning of the file is specified in the header.
Doug Zongker	a3ccba6	2012-08-20 15:28:02 -0700	[diff] [blame]	114	*
				115	* This tool can take an optional file of "bonus data". This is an
				116	* extra file of data that is appended to chunk #1 after it is
				117	* compressed (it must be a CHUNK_DEFLATE chunk). The same file must
				118	* be available (and passed to applypatch with -b) when applying the
				119	* patch. This is used to reduce the size of recovery-from-boot
				120	* patches by combining the boot image with recovery ramdisk
				121	* information that is stored on the system partition.
Doug Zongker	512536a	2010-02-17 16:11:44 -0800	[diff] [blame]	122	*/
				123
				124	#include <errno.h>
				125	#include <stdio.h>
				126	#include <stdlib.h>
				127	#include <string.h>
				128	#include <sys/stat.h>
				129	#include <unistd.h>
				130	#include <sys/types.h>
				131
				132	#include "zlib.h"
				133	#include "imgdiff.h"
				134	#include "utils.h"
				135
				136	typedef struct {
				137	int type; // CHUNK_NORMAL, CHUNK_DEFLATE
				138	size_t start; // offset of chunk in original image file
				139
				140	size_t len;
				141	unsigned char* data; // data to be patched (uncompressed, for deflate chunks)
				142
				143	size_t source_start;
				144	size_t source_len;
				145
				146	off_t* I; // used by bsdiff
				147
				148	// --- for CHUNK_DEFLATE chunks only: ---
				149
				150	// original (compressed) deflate data
				151	size_t deflate_len;
				152	unsigned char* deflate_data;
				153
				154	char* filename; // used for zip entries
				155
				156	// deflate encoder parameters
				157	int level, method, windowBits, memLevel, strategy;
				158
				159	size_t source_uncompressed_len;
				160	} ImageChunk;
				161
				162	typedef struct {
				163	int data_offset;
				164	int deflate_len;
				165	int uncomp_len;
				166	char* filename;
				167	} ZipFileEntry;
				168
				169	static int fileentry_compare(const void* a, const void* b) {
				170	int ao = ((ZipFileEntry*)a)->data_offset;
				171	int bo = ((ZipFileEntry*)b)->data_offset;
				172	if (ao < bo) {
				173	return -1;
				174	} else if (ao > bo) {
				175	return 1;
				176	} else {
				177	return 0;
				178	}
				179	}
				180
				181	// from bsdiff.c
				182	int bsdiff(u_char* old, off_t oldsize, off_t** IP, u_char* new, off_t newsize,
				183	const char* patch_filename);
				184
				185	unsigned char* ReadZip(const char* filename,
				186	int* num_chunks, ImageChunk** chunks,
				187	int include_pseudo_chunk) {
				188	struct stat st;
				189	if (stat(filename, &st) != 0) {
				190	printf("failed to stat \"%s\": %s\n", filename, strerror(errno));
				191	return NULL;
				192	}
				193
				194	unsigned char* img = malloc(st.st_size);
				195	FILE* f = fopen(filename, "rb");
				196	if (fread(img, 1, st.st_size, f) != st.st_size) {
				197	printf("failed to read \"%s\" %s\n", filename, strerror(errno));
				198	fclose(f);
				199	return NULL;
				200	}
				201	fclose(f);
				202
				203	// look for the end-of-central-directory record.
				204
				205	int i;
				206	for (i = st.st_size-20; i >= 0 && i > st.st_size - 65600; --i) {
				207	if (img[i] == 0x50 && img[i+1] == 0x4b &&
				208	img[i+2] == 0x05 && img[i+3] == 0x06) {
				209	break;
				210	}
				211	}
				212	// double-check: this archive consists of a single "disk"
				213	if (!(img[i+4] == 0 && img[i+5] == 0 && img[i+6] == 0 && img[i+7] == 0)) {
				214	printf("can't process multi-disk archive\n");
				215	return NULL;
				216	}
				217
				218	int cdcount = Read2(img+i+8);
				219	int cdoffset = Read4(img+i+16);
				220
				221	ZipFileEntry* temp_entries = malloc(cdcount * sizeof(ZipFileEntry));
				222	int entrycount = 0;
				223
				224	unsigned char* cd = img+cdoffset;
				225	for (i = 0; i < cdcount; ++i) {
				226	if (!(cd[0] == 0x50 && cd[1] == 0x4b && cd[2] == 0x01 && cd[3] == 0x02)) {
				227	printf("bad central directory entry %d\n", i);
				228	return NULL;
				229	}
				230
				231	int clen = Read4(cd+20); // compressed len
				232	int ulen = Read4(cd+24); // uncompressed len
				233	int nlen = Read2(cd+28); // filename len
				234	int xlen = Read2(cd+30); // extra field len
				235	int mlen = Read2(cd+32); // file comment len
				236	int hoffset = Read4(cd+42); // local header offset
				237
				238	char* filename = malloc(nlen+1);
				239	memcpy(filename, cd+46, nlen);
				240	filename[nlen] = '\0';
				241
				242	int method = Read2(cd+10);
				243
				244	cd += 46 + nlen + xlen + mlen;
				245
				246	if (method != 8) { // 8 == deflate
				247	free(filename);
				248	continue;
				249	}
				250
				251	unsigned char* lh = img + hoffset;
				252
				253	if (!(lh[0] == 0x50 && lh[1] == 0x4b && lh[2] == 0x03 && lh[3] == 0x04)) {
				254	printf("bad local file header entry %d\n", i);
				255	return NULL;
				256	}
				257
				258	if (Read2(lh+26) != nlen \|\| memcmp(lh+30, filename, nlen) != 0) {
				259	printf("central dir filename doesn't match local header\n");
				260	return NULL;
				261	}
				262
				263	xlen = Read2(lh+28); // extra field len; might be different from CD entry?
				264
				265	temp_entries[entrycount].data_offset = hoffset+30+nlen+xlen;
				266	temp_entries[entrycount].deflate_len = clen;
				267	temp_entries[entrycount].uncomp_len = ulen;
				268	temp_entries[entrycount].filename = filename;
				269	++entrycount;
				270	}
				271
				272	qsort(temp_entries, entrycount, sizeof(ZipFileEntry), fileentry_compare);
				273
				274	#if 0
				275	printf("found %d deflated entries\n", entrycount);
				276	for (i = 0; i < entrycount; ++i) {
				277	printf("off %10d len %10d unlen %10d %p %s\n",
				278	temp_entries[i].data_offset,
				279	temp_entries[i].deflate_len,
				280	temp_entries[i].uncomp_len,
				281	temp_entries[i].filename,
				282	temp_entries[i].filename);
				283	}
				284	#endif
				285
				286	*num_chunks = 0;
				287	chunks = malloc((entrycount2+2) * sizeof(ImageChunk));
				288	ImageChunk* curr = *chunks;
				289
				290	if (include_pseudo_chunk) {
				291	curr->type = CHUNK_NORMAL;
				292	curr->start = 0;
				293	curr->len = st.st_size;
				294	curr->data = img;
				295	curr->filename = NULL;
				296	curr->I = NULL;
				297	++curr;
				298	++*num_chunks;
				299	}
				300
				301	int pos = 0;
				302	int nextentry = 0;
				303
				304	while (pos < st.st_size) {
				305	if (nextentry < entrycount && pos == temp_entries[nextentry].data_offset) {
				306	curr->type = CHUNK_DEFLATE;
				307	curr->start = pos;
				308	curr->deflate_len = temp_entries[nextentry].deflate_len;
				309	curr->deflate_data = img + pos;
				310	curr->filename = temp_entries[nextentry].filename;
				311	curr->I = NULL;
				312
				313	curr->len = temp_entries[nextentry].uncomp_len;
				314	curr->data = malloc(curr->len);
				315
				316	z_stream strm;
				317	strm.zalloc = Z_NULL;
				318	strm.zfree = Z_NULL;
				319	strm.opaque = Z_NULL;
				320	strm.avail_in = curr->deflate_len;
				321	strm.next_in = curr->deflate_data;
				322
				323	// -15 means we are decoding a 'raw' deflate stream; zlib will
				324	// not expect zlib headers.
				325	int ret = inflateInit2(&strm, -15);
				326
				327	strm.avail_out = curr->len;
				328	strm.next_out = curr->data;
				329	ret = inflate(&strm, Z_NO_FLUSH);
				330	if (ret != Z_STREAM_END) {
				331	printf("failed to inflate \"%s\"; %d\n", curr->filename, ret);
				332	return NULL;
				333	}
				334
				335	inflateEnd(&strm);
				336
				337	pos += curr->deflate_len;
				338	++nextentry;
				339	++*num_chunks;
				340	++curr;
				341	continue;
				342	}
				343
				344	// use a normal chunk to take all the data up to the start of the
				345	// next deflate section.
				346
				347	curr->type = CHUNK_NORMAL;
				348	curr->start = pos;
				349	if (nextentry < entrycount) {
				350	curr->len = temp_entries[nextentry].data_offset - pos;
				351	} else {
				352	curr->len = st.st_size - pos;
				353	}
				354	curr->data = img + pos;
				355	curr->filename = NULL;
				356	curr->I = NULL;
				357	pos += curr->len;
				358
				359	++*num_chunks;
				360	++curr;
				361	}
				362
				363	free(temp_entries);
				364	return img;
				365	}
				366
				367	/*
				368	* Read the given file and break it up into chunks, putting the number
				369	* of chunks and their info in num_chunks and *chunks,
				370	* respectively. Returns a malloc'd block of memory containing the
				371	* contents of the file; various pointers in the output chunk array
				372	* will point into this block of memory. The caller should free the
				373	* return value when done with all the chunks. Returns NULL on
				374	* failure.
				375	*/
				376	unsigned char* ReadImage(const char* filename,
				377	int* num_chunks, ImageChunk** chunks) {
				378	struct stat st;
				379	if (stat(filename, &st) != 0) {
				380	printf("failed to stat \"%s\": %s\n", filename, strerror(errno));
				381	return NULL;
				382	}
				383
				384	unsigned char* img = malloc(st.st_size + 4);
				385	FILE* f = fopen(filename, "rb");
				386	if (fread(img, 1, st.st_size, f) != st.st_size) {
				387	printf("failed to read \"%s\" %s\n", filename, strerror(errno));
				388	fclose(f);
				389	return NULL;
				390	}
				391	fclose(f);
				392
				393	// append 4 zero bytes to the data so we can always search for the
				394	// four-byte string 1f8b0800 starting at any point in the actual
				395	// file data, without special-casing the end of the data.
				396	memset(img+st.st_size, 0, 4);
				397
				398	size_t pos = 0;
				399
				400	*num_chunks = 0;
				401	*chunks = NULL;
				402
				403	while (pos < st.st_size) {
				404	unsigned char* p = img+pos;
				405
				406	if (st.st_size - pos >= 4 &&
				407	p[0] == 0x1f && p[1] == 0x8b &&
				408	p[2] == 0x08 && // deflate compression
				409	p[3] == 0x00) { // no header flags
				410	// 'pos' is the offset of the start of a gzip chunk.
				411
				412	*num_chunks += 3;
				413	chunks = realloc(chunks, num_chunks sizeof(ImageChunk));
				414	ImageChunk* curr = chunks + (num_chunks-3);
				415
				416	// create a normal chunk for the header.
				417	curr->start = pos;
				418	curr->type = CHUNK_NORMAL;
				419	curr->len = GZIP_HEADER_LEN;
				420	curr->data = p;
				421	curr->I = NULL;
				422
				423	pos += curr->len;
				424	p += curr->len;
				425	++curr;
				426
				427	curr->type = CHUNK_DEFLATE;
				428	curr->filename = NULL;
				429	curr->I = NULL;
				430
				431	// We must decompress this chunk in order to discover where it
				432	// ends, and so we can put the uncompressed data and its length
				433	// into curr->data and curr->len.
				434
				435	size_t allocated = 32768;
				436	curr->len = 0;
				437	curr->data = malloc(allocated);
				438	curr->start = pos;
				439	curr->deflate_data = p;
				440
				441	z_stream strm;
				442	strm.zalloc = Z_NULL;
				443	strm.zfree = Z_NULL;
				444	strm.opaque = Z_NULL;
				445	strm.avail_in = st.st_size - pos;
				446	strm.next_in = p;
				447
				448	// -15 means we are decoding a 'raw' deflate stream; zlib will
				449	// not expect zlib headers.
				450	int ret = inflateInit2(&strm, -15);
				451
				452	do {
				453	strm.avail_out = allocated - curr->len;
				454	strm.next_out = curr->data + curr->len;
				455	ret = inflate(&strm, Z_NO_FLUSH);
				456	curr->len = allocated - strm.avail_out;
				457	if (strm.avail_out == 0) {
				458	allocated *= 2;
				459	curr->data = realloc(curr->data, allocated);
				460	}
				461	} while (ret != Z_STREAM_END);
				462
				463	curr->deflate_len = st.st_size - strm.avail_in - pos;
				464	inflateEnd(&strm);
				465	pos += curr->deflate_len;
				466	p += curr->deflate_len;
				467	++curr;
				468
				469	// create a normal chunk for the footer
				470
				471	curr->type = CHUNK_NORMAL;
				472	curr->start = pos;
				473	curr->len = GZIP_FOOTER_LEN;
				474	curr->data = img+pos;
				475	curr->I = NULL;
				476
				477	pos += curr->len;
				478	p += curr->len;
				479	++curr;
				480
				481	// The footer (that we just skipped over) contains the size of
				482	// the uncompressed data. Double-check to make sure that it
				483	// matches the size of the data we got when we actually did
				484	// the decompression.
				485	size_t footer_size = Read4(p-4);
				486	if (footer_size != curr[-2].len) {
				487	printf("Error: footer size %d != decompressed size %d\n",
				488	footer_size, curr[-2].len);
				489	free(img);
				490	return NULL;
				491	}
				492	} else {
				493	// Reallocate the list for every chunk; we expect the number of
				494	// chunks to be small (5 for typical boot and recovery images).
				495	++*num_chunks;
				496	chunks = realloc(chunks, num_chunks sizeof(ImageChunk));
				497	ImageChunk* curr = chunks + (num_chunks-1);
				498	curr->start = pos;
				499	curr->I = NULL;
				500
				501	// 'pos' is not the offset of the start of a gzip chunk, so scan
				502	// forward until we find a gzip header.
				503	curr->type = CHUNK_NORMAL;
				504	curr->data = p;
				505
				506	for (curr->len = 0; curr->len < (st.st_size - pos); ++curr->len) {
				507	if (p[curr->len] == 0x1f &&
				508	p[curr->len+1] == 0x8b &&
				509	p[curr->len+2] == 0x08 &&
				510	p[curr->len+3] == 0x00) {
				511	break;
				512	}
				513	}
				514	pos += curr->len;
				515	}
				516	}
				517
				518	return img;
				519	}
				520
				521	#define BUFFER_SIZE 32768
				522
				523	/*
				524	* Takes the uncompressed data stored in the chunk, compresses it
				525	* using the zlib parameters stored in the chunk, and checks that it
				526	* matches exactly the compressed data we started with (also stored in
				527	* the chunk). Return 0 on success.
				528	*/
				529	int TryReconstruction(ImageChunk* chunk, unsigned char* out) {
				530	size_t p = 0;
				531
				532	#if 0
				533	printf("trying %d %d %d %d %d\n",
				534	chunk->level, chunk->method, chunk->windowBits,
				535	chunk->memLevel, chunk->strategy);
				536	#endif
				537
				538	z_stream strm;
				539	strm.zalloc = Z_NULL;
				540	strm.zfree = Z_NULL;
				541	strm.opaque = Z_NULL;
				542	strm.avail_in = chunk->len;
				543	strm.next_in = chunk->data;
				544	int ret;
				545	ret = deflateInit2(&strm, chunk->level, chunk->method, chunk->windowBits,
				546	chunk->memLevel, chunk->strategy);
				547	do {
				548	strm.avail_out = BUFFER_SIZE;
				549	strm.next_out = out;
				550	ret = deflate(&strm, Z_FINISH);
				551	size_t have = BUFFER_SIZE - strm.avail_out;
				552
				553	if (memcmp(out, chunk->deflate_data+p, have) != 0) {
				554	// mismatch; data isn't the same.
				555	deflateEnd(&strm);
				556	return -1;
				557	}
				558	p += have;
				559	} while (ret != Z_STREAM_END);
				560	deflateEnd(&strm);
				561	if (p != chunk->deflate_len) {
				562	// mismatch; ran out of data before we should have.
				563	return -1;
				564	}
				565	return 0;
				566	}
				567
				568	/*
				569	* Verify that we can reproduce exactly the same compressed data that
				570	* we started with. Sets the level, method, windowBits, memLevel, and
				571	* strategy fields in the chunk to the encoding parameters needed to
				572	* produce the right output. Returns 0 on success.
				573	*/
				574	int ReconstructDeflateChunk(ImageChunk* chunk) {
				575	if (chunk->type != CHUNK_DEFLATE) {
				576	printf("attempt to reconstruct non-deflate chunk\n");
				577	return -1;
				578	}
				579
				580	size_t p = 0;
				581	unsigned char* out = malloc(BUFFER_SIZE);
				582
				583	// We only check two combinations of encoder parameters: level 6
				584	// (the default) and level 9 (the maximum).
				585	for (chunk->level = 6; chunk->level <= 9; chunk->level += 3) {
				586	chunk->windowBits = -15; // 32kb window; negative to indicate a raw stream.
				587	chunk->memLevel = 8; // the default value.
				588	chunk->method = Z_DEFLATED;
				589	chunk->strategy = Z_DEFAULT_STRATEGY;
				590
				591	if (TryReconstruction(chunk, out) == 0) {
				592	free(out);
				593	return 0;
				594	}
				595	}
				596
				597	free(out);
				598	return -1;
				599	}
				600
				601	/*
				602	* Given source and target chunks, compute a bsdiff patch between them
				603	* by running bsdiff in a subprocess. Return the patch data, placing
				604	* its length in *size. Return NULL on failure. We expect the bsdiff
				605	* program to be in the path.
				606	*/
				607	unsigned char* MakePatch(ImageChunk* src, ImageChunk* tgt, size_t* size) {
				608	if (tgt->type == CHUNK_NORMAL) {
				609	if (tgt->len <= 160) {
				610	tgt->type = CHUNK_RAW;
				611	*size = tgt->len;
				612	return tgt->data;
				613	}
				614	}
				615
				616	char ptemp[] = "/tmp/imgdiff-patch-XXXXXX";
				617	mkstemp(ptemp);
				618
				619	int r = bsdiff(src->data, src->len, &(src->I), tgt->data, tgt->len, ptemp);
				620	if (r != 0) {
				621	printf("bsdiff() failed: %d\n", r);
				622	return NULL;
				623	}
				624
				625	struct stat st;
				626	if (stat(ptemp, &st) != 0) {
				627	printf("failed to stat patch file %s: %s\n",
				628	ptemp, strerror(errno));
				629	return NULL;
				630	}
				631
				632	unsigned char* data = malloc(st.st_size);
				633
				634	if (tgt->type == CHUNK_NORMAL && tgt->len <= st.st_size) {
				635	unlink(ptemp);
				636
				637	tgt->type = CHUNK_RAW;
				638	*size = tgt->len;
				639	return tgt->data;
				640	}
				641
				642	*size = st.st_size;
				643
				644	FILE* f = fopen(ptemp, "rb");
				645	if (f == NULL) {
				646	printf("failed to open patch %s: %s\n", ptemp, strerror(errno));
				647	return NULL;
				648	}
				649	if (fread(data, 1, st.st_size, f) != st.st_size) {
				650	printf("failed to read patch %s: %s\n", ptemp, strerror(errno));
				651	return NULL;
				652	}
				653	fclose(f);
				654
				655	unlink(ptemp);
				656
				657	tgt->source_start = src->start;
				658	switch (tgt->type) {
				659	case CHUNK_NORMAL:
				660	tgt->source_len = src->len;
				661	break;
				662	case CHUNK_DEFLATE:
				663	tgt->source_len = src->deflate_len;
				664	tgt->source_uncompressed_len = src->len;
				665	break;
				666	}
				667
				668	return data;
				669	}
				670
				671	/*
				672	* Cause a gzip chunk to be treated as a normal chunk (ie, as a blob
				673	* of uninterpreted data). The resulting patch will likely be about
				674	* as big as the target file, but it lets us handle the case of images
				675	* where some gzip chunks are reconstructible but others aren't (by
				676	* treating the ones that aren't as normal chunks).
				677	*/
				678	void ChangeDeflateChunkToNormal(ImageChunk* ch) {
				679	if (ch->type != CHUNK_DEFLATE) return;
				680	ch->type = CHUNK_NORMAL;
				681	free(ch->data);
				682	ch->data = ch->deflate_data;
				683	ch->len = ch->deflate_len;
				684	}
				685
				686	/*
				687	* Return true if the data in the chunk is identical (including the
				688	* compressed representation, for gzip chunks).
				689	*/
				690	int AreChunksEqual(ImageChunk* a, ImageChunk* b) {
				691	if (a->type != b->type) return 0;
				692
				693	switch (a->type) {
				694	case CHUNK_NORMAL:
				695	return a->len == b->len && memcmp(a->data, b->data, a->len) == 0;
				696
				697	case CHUNK_DEFLATE:
				698	return a->deflate_len == b->deflate_len &&
				699	memcmp(a->deflate_data, b->deflate_data, a->deflate_len) == 0;
				700
				701	default:
				702	printf("unknown chunk type %d\n", a->type);
				703	return 0;
				704	}
				705	}
				706
				707	/*
				708	* Look for runs of adjacent normal chunks and compress them down into
				709	* a single chunk. (Such runs can be produced when deflate chunks are
				710	* changed to normal chunks.)
				711	*/
				712	void MergeAdjacentNormalChunks(ImageChunk* chunks, int* num_chunks) {
				713	int out = 0;
				714	int in_start = 0, in_end;
				715	while (in_start < *num_chunks) {
				716	if (chunks[in_start].type != CHUNK_NORMAL) {
				717	in_end = in_start+1;
				718	} else {
				719	// in_start is a normal chunk. Look for a run of normal chunks
				720	// that constitute a solid block of data (ie, each chunk begins
				721	// where the previous one ended).
				722	for (in_end = in_start+1;
				723	in_end < *num_chunks && chunks[in_end].type == CHUNK_NORMAL &&
				724	(chunks[in_end].start ==
				725	chunks[in_end-1].start + chunks[in_end-1].len &&
				726	chunks[in_end].data ==
				727	chunks[in_end-1].data + chunks[in_end-1].len);
				728	++in_end);
				729	}
				730
				731	if (in_end == in_start+1) {
				732	#if 0
				733	printf("chunk %d is now %d\n", in_start, out);
				734	#endif
				735	if (out != in_start) {
				736	memcpy(chunks+out, chunks+in_start, sizeof(ImageChunk));
				737	}
				738	} else {
				739	#if 0
				740	printf("collapse normal chunks %d-%d into %d\n", in_start, in_end-1, out);
				741	#endif
				742
				743	// Merge chunks [in_start, in_end-1] into one chunk. Since the
				744	// data member of each chunk is just a pointer into an in-memory
				745	// copy of the file, this can be done without recopying (the
				746	// output chunk has the first chunk's start location and data
				747	// pointer, and length equal to the sum of the input chunk
				748	// lengths).
				749	chunks[out].type = CHUNK_NORMAL;
				750	chunks[out].start = chunks[in_start].start;
				751	chunks[out].data = chunks[in_start].data;
				752	chunks[out].len = chunks[in_end-1].len +
				753	(chunks[in_end-1].start - chunks[in_start].start);
				754	}
				755
				756	++out;
				757	in_start = in_end;
				758	}
				759	*num_chunks = out;
				760	}
				761
				762	ImageChunk* FindChunkByName(const char* name,
				763	ImageChunk* chunks, int num_chunks) {
				764	int i;
				765	for (i = 0; i < num_chunks; ++i) {
				766	if (chunks[i].type == CHUNK_DEFLATE && chunks[i].filename &&
				767	strcmp(name, chunks[i].filename) == 0) {
				768	return chunks+i;
				769	}
				770	}
				771	return NULL;
				772	}
				773
				774	void DumpChunks(ImageChunk* chunks, int num_chunks) {
				775	int i;
				776	for (i = 0; i < num_chunks; ++i) {
				777	printf("chunk %d: type %d start %d len %d\n",
				778	i, chunks[i].type, chunks[i].start, chunks[i].len);
				779	}
				780	}
				781
				782	int main(int argc, char** argv) {
Doug Zongker	512536a	2010-02-17 16:11:44 -0800	[diff] [blame]	783	int zip_mode = 0;
				784
Doug Zongker	a3ccba6	2012-08-20 15:28:02 -0700	[diff] [blame]	785	if (argc >= 2 && strcmp(argv[1], "-z") == 0) {
Doug Zongker	512536a	2010-02-17 16:11:44 -0800	[diff] [blame]	786	zip_mode = 1;
				787	--argc;
				788	++argv;
				789	}
				790
Doug Zongker	a3ccba6	2012-08-20 15:28:02 -0700	[diff] [blame]	791	size_t bonus_size = 0;
				792	unsigned char* bonus_data = NULL;
				793	if (argc >= 3 && strcmp(argv[1], "-b") == 0) {
				794	struct stat st;
				795	if (stat(argv[2], &st) != 0) {
				796	printf("failed to stat bonus file %s: %s\n", argv[2], strerror(errno));
				797	return 1;
				798	}
				799	bonus_size = st.st_size;
				800	bonus_data = malloc(bonus_size);
				801	FILE* f = fopen(argv[2], "rb");
				802	if (f == NULL) {
				803	printf("failed to open bonus file %s: %s\n", argv[2], strerror(errno));
				804	return 1;
				805	}
				806	if (fread(bonus_data, 1, bonus_size, f) != bonus_size) {
				807	printf("failed to read bonus file %s: %s\n", argv[2], strerror(errno));
				808	return 1;
				809	}
				810	fclose(f);
				811
				812	argc -= 2;
				813	argv += 2;
				814	}
				815
				816	if (argc != 4) {
				817	usage:
				818	printf("usage: %s [-z] [-b <bonus-file>] <src-img> <tgt-img> <patch-file>\n",
				819	argv[0]);
				820	return 2;
				821	}
Doug Zongker	512536a	2010-02-17 16:11:44 -0800	[diff] [blame]	822
				823	int num_src_chunks;
				824	ImageChunk* src_chunks;
				825	int num_tgt_chunks;
				826	ImageChunk* tgt_chunks;
				827	int i;
				828
				829	if (zip_mode) {
				830	if (ReadZip(argv[1], &num_src_chunks, &src_chunks, 1) == NULL) {
				831	printf("failed to break apart source zip file\n");
				832	return 1;
				833	}
				834	if (ReadZip(argv[2], &num_tgt_chunks, &tgt_chunks, 0) == NULL) {
				835	printf("failed to break apart target zip file\n");
				836	return 1;
				837	}
				838	} else {
				839	if (ReadImage(argv[1], &num_src_chunks, &src_chunks) == NULL) {
				840	printf("failed to break apart source image\n");
				841	return 1;
				842	}
				843	if (ReadImage(argv[2], &num_tgt_chunks, &tgt_chunks) == NULL) {
				844	printf("failed to break apart target image\n");
				845	return 1;
				846	}
				847
				848	// Verify that the source and target images have the same chunk
				849	// structure (ie, the same sequence of deflate and normal chunks).
				850
				851	if (!zip_mode) {
				852	// Merge the gzip header and footer in with any adjacent
				853	// normal chunks.
				854	MergeAdjacentNormalChunks(tgt_chunks, &num_tgt_chunks);
				855	MergeAdjacentNormalChunks(src_chunks, &num_src_chunks);
				856	}
				857
				858	if (num_src_chunks != num_tgt_chunks) {
				859	printf("source and target don't have same number of chunks!\n");
				860	printf("source chunks:\n");
				861	DumpChunks(src_chunks, num_src_chunks);
				862	printf("target chunks:\n");
				863	DumpChunks(tgt_chunks, num_tgt_chunks);
				864	return 1;
				865	}
				866	for (i = 0; i < num_src_chunks; ++i) {
				867	if (src_chunks[i].type != tgt_chunks[i].type) {
				868	printf("source and target don't have same chunk "
				869	"structure! (chunk %d)\n", i);
				870	printf("source chunks:\n");
				871	DumpChunks(src_chunks, num_src_chunks);
				872	printf("target chunks:\n");
				873	DumpChunks(tgt_chunks, num_tgt_chunks);
				874	return 1;
				875	}
				876	}
				877	}
				878
				879	for (i = 0; i < num_tgt_chunks; ++i) {
				880	if (tgt_chunks[i].type == CHUNK_DEFLATE) {
				881	// Confirm that given the uncompressed chunk data in the target, we
				882	// can recompress it and get exactly the same bits as are in the
				883	// input target image. If this fails, treat the chunk as a normal
				884	// non-deflated chunk.
				885	if (ReconstructDeflateChunk(tgt_chunks+i) < 0) {
				886	printf("failed to reconstruct target deflate chunk %d [%s]; "
				887	"treating as normal\n", i, tgt_chunks[i].filename);
				888	ChangeDeflateChunkToNormal(tgt_chunks+i);
				889	if (zip_mode) {
				890	ImageChunk* src = FindChunkByName(tgt_chunks[i].filename, src_chunks, num_src_chunks);
				891	if (src) {
				892	ChangeDeflateChunkToNormal(src);
				893	}
				894	} else {
				895	ChangeDeflateChunkToNormal(src_chunks+i);
				896	}
				897	continue;
				898	}
				899
				900	// If two deflate chunks are identical (eg, the kernel has not
				901	// changed between two builds), treat them as normal chunks.
				902	// This makes applypatch much faster -- it can apply a trivial
				903	// patch to the compressed data, rather than uncompressing and
				904	// recompressing to apply the trivial patch to the uncompressed
				905	// data.
				906	ImageChunk* src;
				907	if (zip_mode) {
				908	src = FindChunkByName(tgt_chunks[i].filename, src_chunks, num_src_chunks);
				909	} else {
				910	src = src_chunks+i;
				911	}
				912
				913	if (src == NULL \|\| AreChunksEqual(tgt_chunks+i, src)) {
				914	ChangeDeflateChunkToNormal(tgt_chunks+i);
				915	if (src) {
				916	ChangeDeflateChunkToNormal(src);
				917	}
				918	}
				919	}
				920	}
				921
				922	// Merging neighboring normal chunks.
				923	if (zip_mode) {
				924	// For zips, we only need to do this to the target: deflated
				925	// chunks are matched via filename, and normal chunks are patched
				926	// using the entire source file as the source.
				927	MergeAdjacentNormalChunks(tgt_chunks, &num_tgt_chunks);
				928	} else {
				929	// For images, we need to maintain the parallel structure of the
				930	// chunk lists, so do the merging in both the source and target
				931	// lists.
				932	MergeAdjacentNormalChunks(tgt_chunks, &num_tgt_chunks);
				933	MergeAdjacentNormalChunks(src_chunks, &num_src_chunks);
				934	if (num_src_chunks != num_tgt_chunks) {
				935	// This shouldn't happen.
				936	printf("merging normal chunks went awry\n");
				937	return 1;
				938	}
				939	}
				940
				941	// Compute bsdiff patches for each chunk's data (the uncompressed
				942	// data, in the case of deflate chunks).
				943
Doug Zongker	a3ccba6	2012-08-20 15:28:02 -0700	[diff] [blame]	944	DumpChunks(src_chunks, num_src_chunks);
				945
Doug Zongker	512536a	2010-02-17 16:11:44 -0800	[diff] [blame]	946	printf("Construct patches for %d chunks...\n", num_tgt_chunks);
				947	unsigned char** patch_data = malloc(num_tgt_chunks * sizeof(unsigned char*));
				948	size_t* patch_size = malloc(num_tgt_chunks * sizeof(size_t));
				949	for (i = 0; i < num_tgt_chunks; ++i) {
				950	if (zip_mode) {
				951	ImageChunk* src;
				952	if (tgt_chunks[i].type == CHUNK_DEFLATE &&
				953	(src = FindChunkByName(tgt_chunks[i].filename, src_chunks,
				954	num_src_chunks))) {
				955	patch_data[i] = MakePatch(src, tgt_chunks+i, patch_size+i);
				956	} else {
				957	patch_data[i] = MakePatch(src_chunks, tgt_chunks+i, patch_size+i);
				958	}
				959	} else {
Doug Zongker	a3ccba6	2012-08-20 15:28:02 -0700	[diff] [blame]	960	if (i == 1 && bonus_data) {
				961	printf(" using %d bytes of bonus data for chunk %d\n", bonus_size, i);
				962	src_chunks[i].data = realloc(src_chunks[i].data, src_chunks[i].len + bonus_size);
				963	memcpy(src_chunks[i].data+src_chunks[i].len, bonus_data, bonus_size);
				964	src_chunks[i].len += bonus_size;
				965	}
				966
Doug Zongker	512536a	2010-02-17 16:11:44 -0800	[diff] [blame]	967	patch_data[i] = MakePatch(src_chunks+i, tgt_chunks+i, patch_size+i);
				968	}
				969	printf("patch %3d is %d bytes (of %d)\n",
				970	i, patch_size[i], tgt_chunks[i].source_len);
				971	}
				972
				973	// Figure out how big the imgdiff file header is going to be, so
				974	// that we can correctly compute the offset of each bsdiff patch
				975	// within the file.
				976
				977	size_t total_header_size = 12;
				978	for (i = 0; i < num_tgt_chunks; ++i) {
				979	total_header_size += 4;
				980	switch (tgt_chunks[i].type) {
				981	case CHUNK_NORMAL:
				982	total_header_size += 8*3;
				983	break;
				984	case CHUNK_DEFLATE:
				985	total_header_size += 85 + 45;
				986	break;
				987	case CHUNK_RAW:
				988	total_header_size += 4 + patch_size[i];
				989	break;
				990	}
				991	}
				992
				993	size_t offset = total_header_size;
				994
				995	FILE* f = fopen(argv[3], "wb");
				996
				997	// Write out the headers.
				998
				999	fwrite("IMGDIFF2", 1, 8, f);
				1000	Write4(num_tgt_chunks, f);
				1001	for (i = 0; i < num_tgt_chunks; ++i) {
				1002	Write4(tgt_chunks[i].type, f);
				1003
				1004	switch (tgt_chunks[i].type) {
				1005	case CHUNK_NORMAL:
				1006	printf("chunk %3d: normal (%10d, %10d) %10d\n", i,
				1007	tgt_chunks[i].start, tgt_chunks[i].len, patch_size[i]);
				1008	Write8(tgt_chunks[i].source_start, f);
				1009	Write8(tgt_chunks[i].source_len, f);
				1010	Write8(offset, f);
				1011	offset += patch_size[i];
				1012	break;
				1013
				1014	case CHUNK_DEFLATE:
				1015	printf("chunk %3d: deflate (%10d, %10d) %10d %s\n", i,
				1016	tgt_chunks[i].start, tgt_chunks[i].deflate_len, patch_size[i],
				1017	tgt_chunks[i].filename);
				1018	Write8(tgt_chunks[i].source_start, f);
				1019	Write8(tgt_chunks[i].source_len, f);
				1020	Write8(offset, f);
				1021	Write8(tgt_chunks[i].source_uncompressed_len, f);
				1022	Write8(tgt_chunks[i].len, f);
				1023	Write4(tgt_chunks[i].level, f);
				1024	Write4(tgt_chunks[i].method, f);
				1025	Write4(tgt_chunks[i].windowBits, f);
				1026	Write4(tgt_chunks[i].memLevel, f);
				1027	Write4(tgt_chunks[i].strategy, f);
				1028	offset += patch_size[i];
				1029	break;
				1030
				1031	case CHUNK_RAW:
				1032	printf("chunk %3d: raw (%10d, %10d)\n", i,
				1033	tgt_chunks[i].start, tgt_chunks[i].len);
				1034	Write4(patch_size[i], f);
				1035	fwrite(patch_data[i], 1, patch_size[i], f);
				1036	break;
				1037	}
				1038	}
				1039
				1040	// Append each chunk's bsdiff patch, in order.
				1041
				1042	for (i = 0; i < num_tgt_chunks; ++i) {
				1043	if (tgt_chunks[i].type != CHUNK_RAW) {
				1044	fwrite(patch_data[i], 1, patch_size[i], f);
				1045	}
				1046	}
				1047
				1048	fclose(f);
				1049
				1050	return 0;
				1051	}