blob: 5416bc97dbd76df77fe79d559ff86dbdd100971f [file] [log] [blame]
Dees_Troy51a0e822012-09-05 15:24:24 -04001/* pigz.c -- parallel implementation of gzip
Dees_Troy3bde1232012-09-22 08:10:28 -04002 * Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012 Mark Adler
3 * Version 2.2.5 28 Jul 2012 Mark Adler
Dees_Troy51a0e822012-09-05 15:24:24 -04004 */
5
6/*
7 This software is provided 'as-is', without any express or implied
8 warranty. In no event will the author be held liable for any damages
9 arising from the use of this software.
10
11 Permission is granted to anyone to use this software for any purpose,
12 including commercial applications, and to alter it and redistribute it
13 freely, subject to the following restrictions:
14
15 1. The origin of this software must not be misrepresented; you must not
16 claim that you wrote the original software. If you use this software
17 in a product, an acknowledgment in the product documentation would be
18 appreciated but is not required.
19 2. Altered source versions must be plainly marked as such, and must not be
20 misrepresented as being the original software.
21 3. This notice may not be removed or altered from any source distribution.
22
23 Mark Adler
24 madler@alumni.caltech.edu
25
26 Mark accepts donations for providing this software. Donations are not
27 required or expected. Any amount that you feel is appropriate would be
28 appreciated. You can use this link:
29
30 https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=536055
31
32 */
33
34/* Version history:
35 1.0 17 Jan 2007 First version, pipe only
36 1.1 28 Jan 2007 Avoid void * arithmetic (some compilers don't get that)
37 Add note about requiring zlib 1.2.3
38 Allow compression level 0 (no compression)
39 Completely rewrite parallelism -- add a write thread
40 Use deflateSetDictionary() to make use of history
41 Tune argument defaults to best performance on four cores
42 1.2.1 1 Feb 2007 Add long command line options, add all gzip options
43 Add debugging options
44 1.2.2 19 Feb 2007 Add list (--list) function
45 Process file names on command line, write .gz output
46 Write name and time in gzip header, set output file time
47 Implement all command line options except --recursive
48 Add --keep option to prevent deleting input files
49 Add thread tracing information with -vv used
Dees_Troy3bde1232012-09-22 08:10:28 -040050 Copy crc32_combine() from zlib (shared libraries issue)
Dees_Troy51a0e822012-09-05 15:24:24 -040051 1.3 25 Feb 2007 Implement --recursive
52 Expand help to show all options
53 Show help if no arguments or output piping are provided
54 Process options in GZIP environment variable
55 Add progress indicator to write thread if --verbose
56 1.4 4 Mar 2007 Add --independent to facilitate damaged file recovery
57 Reallocate jobs for new --blocksize or --processes
58 Do not delete original if writing to stdout
59 Allow --processes 1, which does no threading
60 Add NOTHREAD define to compile without threads
61 Incorporate license text from zlib in source code
62 1.5 25 Mar 2007 Reinitialize jobs for new compression level
63 Copy attributes and owner from input file to output file
64 Add decompression and testing
65 Add -lt (or -ltv) to show all entries and proper lengths
66 Add decompression, testing, listing of LZW (.Z) files
67 Only generate and show trace log if DEBUG defined
68 Take "-" argument to mean read file from stdin
69 1.6 30 Mar 2007 Add zlib stream compression (--zlib), and decompression
70 1.7 29 Apr 2007 Decompress first entry of a zip file (if deflated)
71 Avoid empty deflate blocks at end of deflate stream
72 Show zlib check value (Adler-32) when listing
73 Don't complain when decompressing empty file
74 Warn about trailing junk for gzip and zlib streams
75 Make listings consistent, ignore gzip extra flags
76 Add zip stream compression (--zip)
77 1.8 13 May 2007 Document --zip option in help output
78 2.0 19 Oct 2008 Complete rewrite of thread usage and synchronization
79 Use polling threads and a pool of memory buffers
80 Remove direct pthread library use, hide in yarn.c
81 2.0.1 20 Oct 2008 Check version of zlib at compile time, need >= 1.2.3
82 2.1 24 Oct 2008 Decompress with read, write, inflate, and check threads
83 Remove spurious use of ctime_r(), ctime() more portable
84 Change application of job->calc lock to be a semaphore
85 Detect size of off_t at run time to select %lu vs. %llu
86 #define large file support macro even if not __linux__
87 Remove _LARGEFILE64_SOURCE, _FILE_OFFSET_BITS is enough
88 Detect file-too-large error and report, blame build
89 Replace check combination routines with those from zlib
90 2.1.1 28 Oct 2008 Fix a leak for files with an integer number of blocks
91 Update for yarn 1.1 (yarn_prefix and yarn_abort)
92 2.1.2 30 Oct 2008 Work around use of beta zlib in production systems
93 2.1.3 8 Nov 2008 Don't use zlib combination routines, put back in pigz
94 2.1.4 9 Nov 2008 Fix bug when decompressing very short files
95 2.1.5 20 Jul 2009 Added 2008, 2009 to --license statement
96 Allow numeric parameter immediately after -p or -b
97 Enforce parameter after -p, -b, -s, before other options
98 Enforce numeric parameters to have only numeric digits
99 Try to determine the number of processors for -p default
100 Fix --suffix short option to be -S to match gzip [Bloch]
101 Decompress if executable named "unpigz" [Amundsen]
102 Add a little bit of testing to Makefile
103 2.1.6 17 Jan 2010 Added pigz.spec to distribution for RPM systems [Brown]
104 Avoid some compiler warnings
105 Process symbolic links if piping to stdout [Hoffstätte]
106 Decompress if executable named "gunzip" [Hoffstätte]
107 Allow ".tgz" suffix [Chernookiy]
108 Fix adler32 comparison on .zz files
Dees_Troy3bde1232012-09-22 08:10:28 -0400109 2.1.7 17 Dec 2011 Avoid unused parameter warning in reenter()
110 Don't assume 2's complement ints in compress_thread()
111 Replicate gzip -cdf cat-like behavior
112 Replicate gzip -- option to suppress option decoding
113 Test output from make test instead of showing it
114 Updated pigz.spec to install unpigz, pigz.1 [Obermaier]
115 Add PIGZ environment variable [Mueller]
116 Replicate gzip suffix search when decoding or listing
117 Fix bug in load() to set in_left to zero on end of file
118 Do not check suffix when input file won't be modified
119 Decompress to stdout if name is "*cat" [Hayasaka]
120 Write data descriptor signature to be like Info-ZIP
121 Update and sort options list in help
122 Use CC variable for compiler in Makefile
123 Exit with code 2 if a warning has been issued
124 Fix thread synchronization problem when tracing
125 Change macro name MAX to MAX2 to avoid library conflicts
126 Determine number of processors on HP-UX [Lloyd]
127 2.2 31 Dec 2011 Check for expansion bound busting (e.g. modified zlib)
128 Make the "threads" list head global variable volatile
129 Fix construction and printing of 32-bit check values
130 Add --rsyncable functionality
131 2.2.1 1 Jan 2012 Fix bug in --rsyncable buffer management
132 2.2.2 1 Jan 2012 Fix another bug in --rsyncable buffer management
133 2.2.3 15 Jan 2012 Remove volatile in yarn.c
134 Reduce the number of input buffers
135 Change initial rsyncable hash to comparison value
136 Improve the efficiency of arriving at a byte boundary
137 Add thread portability #defines from yarn.c
138 Have rsyncable compression be independent of threading
139 Fix bug where constructed dictionaries not being used
140 2.2.4 11 Mar 2012 Avoid some return value warnings
141 Improve the portability of printing the off_t type
142 Check for existence of compress binary before using
143 Update zlib version checking to 1.2.6 for new functions
144 Fix bug in zip (-K) output
145 Fix license in pigz.spec
146 Remove thread portability #defines in pigz.c
147 2.2.5 28 Jul 2012 Avoid race condition in free_pool()
148 Change suffix to .tar when decompressing or listing .tgz
149 Print name of executable in error messages
150 Show help properly when the name is unpigz or gunzip
151 Fix permissions security problem before output is closed
Dees_Troy51a0e822012-09-05 15:24:24 -0400152 */
153
Dees_Troy3bde1232012-09-22 08:10:28 -0400154#define VERSION "pigz 2.2.5\n"
Dees_Troy51a0e822012-09-05 15:24:24 -0400155
156/* To-do:
Dees_Troy51a0e822012-09-05 15:24:24 -0400157 - make source portable for Windows, VMS, etc. (see gzip source code)
158 - make build portable (currently good for Unixish)
Dees_Troy51a0e822012-09-05 15:24:24 -0400159 */
160
161/*
162 pigz compresses using threads to make use of multiple processors and cores.
163 The input is broken up into 128 KB chunks with each compressed in parallel.
164 The individual check value for each chunk is also calculated in parallel.
165 The compressed data is written in order to the output, and a combined check
166 value is calculated from the individual check values.
167
168 The compressed data format generated is in the gzip, zlib, or single-entry
169 zip format using the deflate compression method. The compression produces
170 partial raw deflate streams which are concatenated by a single write thread
171 and wrapped with the appropriate header and trailer, where the trailer
172 contains the combined check value.
173
174 Each partial raw deflate stream is terminated by an empty stored block
175 (using the Z_SYNC_FLUSH option of zlib), in order to end that partial bit
Dees_Troy3bde1232012-09-22 08:10:28 -0400176 stream at a byte boundary, unless that partial stream happens to already end
177 at a byte boundary (the latter requires zlib 1.2.6 or later). Ending on a
178 byte boundary allows the partial streams to be concatenated simply as
179 sequences of bytes. This adds a very small four to five byte overhead
180 (average 3.75 bytes) to the output for each input chunk.
Dees_Troy51a0e822012-09-05 15:24:24 -0400181
182 The default input block size is 128K, but can be changed with the -b option.
183 The number of compress threads is set by default to 8, which can be changed
184 using the -p option. Specifying -p 1 avoids the use of threads entirely.
185 pigz will try to determine the number of processors in the machine, in which
186 case if that number is two or greater, pigz will use that as the default for
187 -p instead of 8.
188
189 The input blocks, while compressed independently, have the last 32K of the
190 previous block loaded as a preset dictionary to preserve the compression
191 effectiveness of deflating in a single thread. This can be turned off using
192 the --independent or -i option, so that the blocks can be decompressed
193 independently for partial error recovery or for random access.
194
195 Decompression can't be parallelized, at least not without specially prepared
196 deflate streams for that purpose. As a result, pigz uses a single thread
197 (the main thread) for decompression, but will create three other threads for
198 reading, writing, and check calculation, which can speed up decompression
199 under some circumstances. Parallel decompression can be turned off by
200 specifying one process (-dp 1 or -tp 1).
201
202 pigz requires zlib 1.2.1 or later to allow setting the dictionary when doing
203 raw deflate. Since zlib 1.2.3 corrects security vulnerabilities in zlib
204 version 1.2.1 and 1.2.2, conditionals check for zlib 1.2.3 or later during
205 the compilation of pigz.c. zlib 1.2.4 includes some improvements to
206 Z_FULL_FLUSH and deflateSetDictionary() that permit identical output for
207 pigz with and without threads, which is not possible with zlib 1.2.3. This
208 may be important for uses of pigz -R where small changes in the contents
209 should result in small changes in the archive for rsync. Note that due to
210 the details of how the lower levels of compression result in greater speed,
211 compression level 3 and below does not permit identical pigz output with
212 and without threads.
213
214 pigz uses the POSIX pthread library for thread control and communication,
215 through the yarn.h interface to yarn.c. yarn.c can be replaced with
216 equivalent implementations using other thread libraries. pigz can be
217 compiled with NOTHREAD #defined to not use threads at all (in which case
218 pigz will not be able to live up to the "parallel" in its name).
219 */
220
221/*
222 Details of parallel compression implementation:
223
224 When doing parallel compression, pigz uses the main thread to read the input
225 in 'size' sized chunks (see -b), and puts those in a compression job list,
226 each with a sequence number to keep track of the ordering. If it is not the
227 first chunk, then that job also points to the previous input buffer, from
228 which the last 32K will be used as a dictionary (unless -i is specified).
229 This sets a lower limit of 32K on 'size'.
230
231 pigz launches up to 'procs' compression threads (see -p). Each compression
232 thread continues to look for jobs in the compression list and perform those
233 jobs until instructed to return. When a job is pulled, the dictionary, if
234 provided, will be loaded into the deflate engine and then that input buffer
235 is dropped for reuse. Then the input data is compressed into an output
Dees_Troy3bde1232012-09-22 08:10:28 -0400236 buffer that grows in size if necessary to hold the compressed data. The job
237 is then put into the write job list, sorted by the sequence number. The
238 compress thread however continues to calculate the check value on the input
239 data, either a CRC-32 or Adler-32, possibly in parallel with the write
Dees_Troy51a0e822012-09-05 15:24:24 -0400240 thread writing the output data. Once that's done, the compress thread drops
241 the input buffer and also releases the lock on the check value so that the
242 write thread can combine it with the previous check values. The compress
243 thread has then completed that job, and goes to look for another.
244
245 All of the compress threads are left running and waiting even after the last
246 chunk is processed, so that they can support the next input to be compressed
247 (more than one input file on the command line). Once pigz is done, it will
248 call all the compress threads home (that'll do pig, that'll do).
249
250 Before starting to read the input, the main thread launches the write thread
251 so that it is ready pick up jobs immediately. The compress thread puts the
252 write jobs in the list in sequence sorted order, so that the first job in
253 the list is always has the lowest sequence number. The write thread waits
254 for the next write job in sequence, and then gets that job. The job still
255 holds its input buffer, from which the write thread gets the input buffer
256 length for use in check value combination. Then the write thread drops that
257 input buffer to allow its reuse. Holding on to the input buffer until the
258 write thread starts also has the benefit that the read and compress threads
259 can't get way ahead of the write thread and build up a large backlog of
260 unwritten compressed data. The write thread will write the compressed data,
261 drop the output buffer, and then wait for the check value to be unlocked
262 by the compress thread. Then the write thread combines the check value for
263 this chunk with the total check value for eventual use in the trailer. If
264 this is not the last chunk, the write thread then goes back to look for the
265 next output chunk in sequence. After the last chunk, the write thread
266 returns and joins the main thread. Unlike the compress threads, a new write
267 thread is launched for each input stream. The write thread writes the
268 appropriate header and trailer around the compressed data.
269
270 The input and output buffers are reused through their collection in pools.
271 Each buffer has a use count, which when decremented to zero returns the
272 buffer to the respective pool. Each input buffer has up to three parallel
273 uses: as the input for compression, as the data for the check value
274 calculation, and as a dictionary for compression. Each output buffer has
275 only one use, which is as the output of compression followed serially as
276 data to be written. The input pool is limited in the number of buffers, so
277 that reading does not get way ahead of compression and eat up memory with
278 more input than can be used. The limit is approximately two times the
279 number of compression threads. In the case that reading is fast as compared
280 to compression, that number allows a second set of buffers to be read while
281 the first set of compressions are being performed. The number of output
282 buffers is not directly limited, but is indirectly limited by the release of
Dees_Troy3bde1232012-09-22 08:10:28 -0400283 input buffers to about the same number.
Dees_Troy51a0e822012-09-05 15:24:24 -0400284 */
285
286/* use large file functions if available */
287#define _FILE_OFFSET_BITS 64
288
289/* included headers and what is expected from each */
290#include <stdio.h> /* fflush(), fprintf(), fputs(), getchar(), putc(), */
291 /* puts(), printf(), vasprintf(), stderr, EOF, NULL,
292 SEEK_END, size_t, off_t */
293#include <stdlib.h> /* exit(), malloc(), free(), realloc(), atol(), */
294 /* atoi(), getenv() */
295#include <stdarg.h> /* va_start(), va_end(), va_list */
296#include <string.h> /* memset(), memchr(), memcpy(), strcmp(), strcpy() */
297 /* strncpy(), strlen(), strcat(), strrchr() */
298#include <errno.h> /* errno, EEXIST */
299#include <assert.h> /* assert() */
300#include <time.h> /* ctime(), time(), time_t, mktime() */
301#include <signal.h> /* signal(), SIGINT */
302#include <sys/types.h> /* ssize_t */
303#include <sys/stat.h> /* chmod(), stat(), fstat(), lstat(), struct stat, */
304 /* S_IFDIR, S_IFLNK, S_IFMT, S_IFREG */
305#include <sys/time.h> /* utimes(), gettimeofday(), struct timeval */
306#include <unistd.h> /* unlink(), _exit(), read(), write(), close(), */
307 /* lseek(), isatty(), chown() */
308#include <fcntl.h> /* open(), O_CREAT, O_EXCL, O_RDONLY, O_TRUNC, */
309 /* O_WRONLY */
310#include <dirent.h> /* opendir(), readdir(), closedir(), DIR, */
311 /* struct dirent */
Dees_Troy3bde1232012-09-22 08:10:28 -0400312#include <limits.h> /* PATH_MAX, UINT_MAX */
313#if __STDC_VERSION__-0 >= 199901L || __GNUC__-0 >= 3
314# include <inttypes.h> /* intmax_t */
315#endif
316
317#ifdef __hpux
318# include <sys/param.h>
319# include <sys/pstat.h>
320#endif
Dees_Troy51a0e822012-09-05 15:24:24 -0400321
322#include "zlib.h" /* deflateInit2(), deflateReset(), deflate(), */
323 /* deflateEnd(), deflateSetDictionary(), crc32(),
324 inflateBackInit(), inflateBack(), inflateBackEnd(),
325 Z_DEFAULT_COMPRESSION, Z_DEFAULT_STRATEGY,
326 Z_DEFLATED, Z_NO_FLUSH, Z_NULL, Z_OK,
327 Z_SYNC_FLUSH, z_stream */
328#if !defined(ZLIB_VERNUM) || ZLIB_VERNUM < 0x1230
329# error Need zlib version 1.2.3 or later
330#endif
331
332#ifndef NOTHREAD
333# include "yarn.h" /* thread, launch(), join(), join_all(), */
334 /* lock, new_lock(), possess(), twist(), wait_for(),
335 release(), peek_lock(), free_lock(), yarn_name */
336#endif
337
338/* for local functions and globals */
339#define local static
340
341/* prevent end-of-line conversions on MSDOSish operating systems */
342#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
343# include <io.h> /* setmode(), O_BINARY */
344# define SET_BINARY_MODE(fd) setmode(fd, O_BINARY)
345#else
346# define SET_BINARY_MODE(fd)
347#endif
348
349/* release an allocated pointer, if allocated, and mark as unallocated */
350#define RELEASE(ptr) \
351 do { \
352 if ((ptr) != NULL) { \
353 free(ptr); \
354 ptr = NULL; \
355 } \
356 } while (0)
357
Dees_Troy3bde1232012-09-22 08:10:28 -0400358/* sliding dictionary size for deflate */
359#define DICT 32768U
360
361/* largest power of 2 that fits in an unsigned int -- used to limit requests
362 to zlib functions that use unsigned int lengths */
363#define MAXP2 (UINT_MAX - (UINT_MAX >> 1))
364
365/* rsyncable constants -- RSYNCBITS is the number of bits in the mask for
366 comparison. For random input data, there will be a hit on average every
367 1<<RSYNCBITS bytes. So for an RSYNCBITS of 12, there will be an average of
368 one hit every 4096 bytes, resulting in a mean block size of 4096. RSYNCMASK
369 is the resulting bit mask. RSYNCHIT is what the hash value is compared to
370 after applying the mask.
371
372 The choice of 12 for RSYNCBITS is consistent with the original rsyncable
373 patch for gzip which also uses a 12-bit mask. This results in a relatively
374 small hit to compression, on the order of 1.5% to 3%. A mask of 13 bits can
375 be used instead if a hit of less than 1% to the compression is desired, at
376 the expense of more blocks transmitted for rsync updates. (Your mileage may
377 vary.)
378
379 This implementation of rsyncable uses a different hash algorithm than what
380 the gzip rsyncable patch uses in order to provide better performance in
381 several regards. The algorithm is simply to shift the hash value left one
382 bit and exclusive-or that with the next byte. This is masked to the number
383 of hash bits (RSYNCMASK) and compared to all ones except for a zero in the
384 top bit (RSYNCHIT). This rolling hash has a very small window of 19 bytes
385 (RSYNCBITS+7). The small window provides the benefit of much more rapid
386 resynchronization after a change, than does the 4096-byte window of the gzip
387 rsyncable patch.
388
389 The comparison value is chosen to avoid matching any repeated bytes or short
390 sequences. The gzip rsyncable patch on the other hand uses a sum and zero
391 for comparison, which results in certain bad behaviors, such as always
392 matching everywhere in a long sequence of zeros. Such sequences occur
393 frequently in tar files.
394
395 This hash efficiently discards history older than 19 bytes simply by
396 shifting that data past the top of the mask -- no history needs to be
397 retained to undo its impact on the hash value, as is needed for a sum.
398
399 The choice of the comparison value (RSYNCHIT) has the virtue of avoiding
400 extremely short blocks. The shortest block is five bytes (RSYNCBITS-7) from
401 hit to hit, and is unlikely. Whereas with the gzip rsyncable algorithm,
402 blocks of one byte are not only possible, but in fact are the most likely
403 block size.
404
405 Thanks and acknowledgement to Kevin Day for his experimentation and insights
406 on rsyncable hash characteristics that led to some of the choices here.
407 */
408#define RSYNCBITS 12
409#define RSYNCMASK ((1U << RSYNCBITS) - 1)
410#define RSYNCHIT (RSYNCMASK >> 1)
411
412/* initial pool counts and sizes -- INBUFS is the limit on the number of input
413 spaces as a function of the number of processors (used to throttle the
414 creation of compression jobs), OUTPOOL is the initial size of the output
415 data buffer, chosen to make resizing of the buffer very unlikely */
416#define INBUFS(p) (((p)<<1)+3)
417#define OUTPOOL(s) ((s)+((s)>>4))
418
Dees_Troy51a0e822012-09-05 15:24:24 -0400419/* globals (modified by main thread only when it's the only thread) */
Dees_Troy3bde1232012-09-22 08:10:28 -0400420local char *prog; /* name by which pigz was invoked */
Dees_Troy51a0e822012-09-05 15:24:24 -0400421local int ind; /* input file descriptor */
422local int outd; /* output file descriptor */
423local char in[PATH_MAX+1]; /* input file name (accommodate recursion) */
424local char *out = NULL; /* output file name (allocated if not NULL) */
425local int verbosity; /* 0 = quiet, 1 = normal, 2 = verbose, 3 = trace */
426local int headis; /* 1 to store name, 2 to store date, 3 both */
427local int pipeout; /* write output to stdout even if file */
428local int keep; /* true to prevent deletion of input file */
Dees_Troy3bde1232012-09-22 08:10:28 -0400429local int force; /* true to overwrite, compress links, cat */
Dees_Troy51a0e822012-09-05 15:24:24 -0400430local int form; /* gzip = 0, zlib = 1, zip = 2 or 3 */
Dees_Troy3bde1232012-09-22 08:10:28 -0400431local unsigned char magic1; /* first byte of possible header when decoding */
Dees_Troy51a0e822012-09-05 15:24:24 -0400432local int recurse; /* true to dive down into directory structure */
433local char *sufx; /* suffix to use (".gz" or user supplied) */
434local char *name; /* name for gzip header */
435local time_t mtime; /* time stamp from input file for gzip header */
436local int list; /* true to list files instead of compress */
437local int first = 1; /* true if we need to print listing header */
438local int decode; /* 0 to compress, 1 to decompress, 2 to test */
439local int level; /* compression level */
440local int rsync; /* true for rsync blocking */
441local int procs; /* maximum number of compression threads (>= 1) */
Dees_Troy3bde1232012-09-22 08:10:28 -0400442local int setdict; /* true to initialize dictionary in each thread */
Dees_Troy51a0e822012-09-05 15:24:24 -0400443local size_t size; /* uncompressed input size per thread (>= 32K) */
Dees_Troy3bde1232012-09-22 08:10:28 -0400444local int warned = 0; /* true if a warning has been given */
Dees_Troy51a0e822012-09-05 15:24:24 -0400445
446/* saved gzip/zip header data for decompression, testing, and listing */
447local time_t stamp; /* time stamp from gzip header */
448local char *hname = NULL; /* name from header (allocated) */
449local unsigned long zip_crc; /* local header crc */
450local unsigned long zip_clen; /* local header compressed length */
451local unsigned long zip_ulen; /* local header uncompressed length */
452
Dees_Troy3bde1232012-09-22 08:10:28 -0400453/* display a complaint with the program name on stderr */
454local int complain(char *fmt, ...)
455{
456 va_list ap;
457
458 if (verbosity > 0) {
459 fprintf(stderr, "%s: ", prog);
460 va_start(ap, fmt);
461 vfprintf(stderr, fmt, ap);
462 va_end(ap);
463 putc('\n', stderr);
464 fflush(stderr);
465 warned = 1;
466 }
467 return 0;
468}
469
Dees_Troy51a0e822012-09-05 15:24:24 -0400470/* exit with error, delete output file if in the middle of writing it */
471local int bail(char *why, char *what)
472{
473 if (outd != -1 && out != NULL)
474 unlink(out);
Dees_Troy3bde1232012-09-22 08:10:28 -0400475 complain("abort: %s%s", why, what);
Dees_Troy51a0e822012-09-05 15:24:24 -0400476 exit(1);
477 return 0;
478}
479
480#ifdef DEBUG
481
482/* starting time of day for tracing */
483local struct timeval start;
484
485/* trace log */
486local struct log {
487 struct timeval when; /* time of entry */
488 char *msg; /* message */
489 struct log *next; /* next entry */
490} *log_head, **log_tail = NULL;
491#ifndef NOTHREAD
492 local lock *log_lock = NULL;
493#endif
494
495/* maximum log entry length */
496#define MAXMSG 256
497
498/* set up log (call from main thread before other threads launched) */
499local void log_init(void)
500{
501 if (log_tail == NULL) {
502#ifndef NOTHREAD
503 log_lock = new_lock(0);
504#endif
505 log_head = NULL;
506 log_tail = &log_head;
507 }
508}
509
510/* add entry to trace log */
511local void log_add(char *fmt, ...)
512{
513 struct timeval now;
514 struct log *me;
515 va_list ap;
516 char msg[MAXMSG];
517
518 gettimeofday(&now, NULL);
519 me = malloc(sizeof(struct log));
520 if (me == NULL)
521 bail("not enough memory", "");
522 me->when = now;
523 va_start(ap, fmt);
524 vsnprintf(msg, MAXMSG, fmt, ap);
525 va_end(ap);
526 me->msg = malloc(strlen(msg) + 1);
527 if (me->msg == NULL) {
528 free(me);
529 bail("not enough memory", "");
530 }
531 strcpy(me->msg, msg);
532 me->next = NULL;
533#ifndef NOTHREAD
534 assert(log_lock != NULL);
535 possess(log_lock);
536#endif
537 *log_tail = me;
538 log_tail = &(me->next);
539#ifndef NOTHREAD
540 twist(log_lock, BY, +1);
541#endif
542}
543
544/* pull entry from trace log and print it, return false if empty */
545local int log_show(void)
546{
547 struct log *me;
548 struct timeval diff;
549
550 if (log_tail == NULL)
551 return 0;
552#ifndef NOTHREAD
553 possess(log_lock);
554#endif
555 me = log_head;
556 if (me == NULL) {
557#ifndef NOTHREAD
558 release(log_lock);
559#endif
560 return 0;
561 }
562 log_head = me->next;
563 if (me->next == NULL)
564 log_tail = &log_head;
565#ifndef NOTHREAD
566 twist(log_lock, BY, -1);
567#endif
568 diff.tv_usec = me->when.tv_usec - start.tv_usec;
569 diff.tv_sec = me->when.tv_sec - start.tv_sec;
570 if (diff.tv_usec < 0) {
571 diff.tv_usec += 1000000L;
572 diff.tv_sec--;
573 }
574 fprintf(stderr, "trace %ld.%06ld %s\n",
575 (long)diff.tv_sec, (long)diff.tv_usec, me->msg);
576 fflush(stderr);
577 free(me->msg);
578 free(me);
579 return 1;
580}
581
582/* release log resources (need to do log_init() to use again) */
583local void log_free(void)
584{
585 struct log *me;
586
587 if (log_tail != NULL) {
588#ifndef NOTHREAD
589 possess(log_lock);
590#endif
591 while ((me = log_head) != NULL) {
592 log_head = me->next;
593 free(me->msg);
594 free(me);
595 }
596#ifndef NOTHREAD
597 twist(log_lock, TO, 0);
598 free_lock(log_lock);
599 log_lock = NULL;
600#endif
601 log_tail = NULL;
602 }
603}
604
605/* show entries until no more, free log */
606local void log_dump(void)
607{
608 if (log_tail == NULL)
609 return;
610 while (log_show())
611 ;
612 log_free();
613}
614
615/* debugging macro */
616#define Trace(x) \
617 do { \
618 if (verbosity > 2) { \
619 log_add x; \
620 } \
621 } while (0)
622
623#else /* !DEBUG */
624
625#define log_dump()
626#define Trace(x)
627
628#endif
629
630/* read up to len bytes into buf, repeating read() calls as needed */
631local size_t readn(int desc, unsigned char *buf, size_t len)
632{
633 ssize_t ret;
634 size_t got;
635
636 got = 0;
637 while (len) {
638 ret = read(desc, buf, len);
639 if (ret < 0)
640 bail("read error on ", in);
641 if (ret == 0)
642 break;
643 buf += ret;
644 len -= ret;
645 got += ret;
646 }
647 return got;
648}
649
650/* write len bytes, repeating write() calls as needed */
651local void writen(int desc, unsigned char *buf, size_t len)
652{
653 ssize_t ret;
654
655 while (len) {
656 ret = write(desc, buf, len);
Dees_Troy3bde1232012-09-22 08:10:28 -0400657 if (ret < 1) {
658 complain("write error code %d", errno);
Dees_Troy51a0e822012-09-05 15:24:24 -0400659 bail("write error on ", out);
Dees_Troy3bde1232012-09-22 08:10:28 -0400660 }
Dees_Troy51a0e822012-09-05 15:24:24 -0400661 buf += ret;
662 len -= ret;
663 }
664}
665
Dees_Troy51a0e822012-09-05 15:24:24 -0400666/* convert Unix time to MS-DOS date and time, assuming current timezone
667 (you got a better idea?) */
668local unsigned long time2dos(time_t t)
669{
670 struct tm *tm;
671 unsigned long dos;
672
673 if (t == 0)
674 t = time(NULL);
675 tm = localtime(&t);
676 if (tm->tm_year < 80 || tm->tm_year > 207)
677 return 0;
678 dos = (tm->tm_year - 80) << 25;
679 dos += (tm->tm_mon + 1) << 21;
680 dos += tm->tm_mday << 16;
681 dos += tm->tm_hour << 11;
682 dos += tm->tm_min << 5;
683 dos += (tm->tm_sec + 1) >> 1; /* round to double-seconds */
684 return dos;
685}
686
687/* put a 4-byte integer into a byte array in LSB order or MSB order */
688#define PUT2L(a,b) (*(a)=(b)&0xff,(a)[1]=(b)>>8)
689#define PUT4L(a,b) (PUT2L(a,(b)&0xffff),PUT2L((a)+2,(b)>>16))
690#define PUT4M(a,b) (*(a)=(b)>>24,(a)[1]=(b)>>16,(a)[2]=(b)>>8,(a)[3]=(b))
691
692/* write a gzip, zlib, or zip header using the information in the globals */
693local unsigned long put_header(void)
694{
695 unsigned long len;
696 unsigned char head[30];
697
698 if (form > 1) { /* zip */
699 /* write local header */
700 PUT4L(head, 0x04034b50UL); /* local header signature */
701 PUT2L(head + 4, 20); /* version needed to extract (2.0) */
702 PUT2L(head + 6, 8); /* flags: data descriptor follows data */
703 PUT2L(head + 8, 8); /* deflate */
704 PUT4L(head + 10, time2dos(mtime));
705 PUT4L(head + 14, 0); /* crc (not here) */
706 PUT4L(head + 18, 0); /* compressed length (not here) */
707 PUT4L(head + 22, 0); /* uncompressed length (not here) */
708 PUT2L(head + 26, name == NULL ? 1 : strlen(name)); /* name length */
709 PUT2L(head + 28, 9); /* length of extra field (see below) */
710 writen(outd, head, 30); /* write local header */
711 len = 30;
712
713 /* write file name (use "-" for stdin) */
714 if (name == NULL)
715 writen(outd, (unsigned char *)"-", 1);
716 else
717 writen(outd, (unsigned char *)name, strlen(name));
718 len += name == NULL ? 1 : strlen(name);
719
720 /* write extended timestamp extra field block (9 bytes) */
721 PUT2L(head, 0x5455); /* extended timestamp signature */
722 PUT2L(head + 2, 5); /* number of data bytes in this block */
723 head[4] = 1; /* flag presence of mod time */
724 PUT4L(head + 5, mtime); /* mod time */
725 writen(outd, head, 9); /* write extra field block */
726 len += 9;
727 }
728 else if (form) { /* zlib */
729 head[0] = 0x78; /* deflate, 32K window */
730 head[1] = (level == 9 ? 3 : (level == 1 ? 0 :
731 (level >= 6 || level == Z_DEFAULT_COMPRESSION ? 1 : 2))) << 6;
732 head[1] += 31 - (((head[0] << 8) + head[1]) % 31);
733 writen(outd, head, 2);
734 len = 2;
735 }
736 else { /* gzip */
737 head[0] = 31;
738 head[1] = 139;
739 head[2] = 8; /* deflate */
740 head[3] = name != NULL ? 8 : 0;
741 PUT4L(head + 4, mtime);
742 head[8] = level == 9 ? 2 : (level == 1 ? 4 : 0);
743 head[9] = 3; /* unix */
744 writen(outd, head, 10);
745 len = 10;
746 if (name != NULL)
747 writen(outd, (unsigned char *)name, strlen(name) + 1);
748 if (name != NULL)
749 len += strlen(name) + 1;
750 }
751 return len;
752}
753
754/* write a gzip, zlib, or zip trailer */
755local void put_trailer(unsigned long ulen, unsigned long clen,
756 unsigned long check, unsigned long head)
757{
758 unsigned char tail[46];
759
760 if (form > 1) { /* zip */
761 unsigned long cent;
762
763 /* write data descriptor (as promised in local header) */
Dees_Troy3bde1232012-09-22 08:10:28 -0400764 PUT4L(tail, 0x08074b50UL);
765 PUT4L(tail + 4, check);
766 PUT4L(tail + 8, clen);
767 PUT4L(tail + 12, ulen);
768 writen(outd, tail, 16);
Dees_Troy51a0e822012-09-05 15:24:24 -0400769
770 /* write central file header */
771 PUT4L(tail, 0x02014b50UL); /* central header signature */
772 tail[4] = 63; /* obeyed version 6.3 of the zip spec */
773 tail[5] = 255; /* ignore external attributes */
774 PUT2L(tail + 6, 20); /* version needed to extract (2.0) */
775 PUT2L(tail + 8, 8); /* data descriptor is present */
776 PUT2L(tail + 10, 8); /* deflate */
777 PUT4L(tail + 12, time2dos(mtime));
778 PUT4L(tail + 16, check); /* crc */
779 PUT4L(tail + 20, clen); /* compressed length */
780 PUT4L(tail + 24, ulen); /* uncompressed length */
781 PUT2L(tail + 28, name == NULL ? 1 : strlen(name)); /* name length */
782 PUT2L(tail + 30, 9); /* length of extra field (see below) */
783 PUT2L(tail + 32, 0); /* no file comment */
784 PUT2L(tail + 34, 0); /* disk number 0 */
785 PUT2L(tail + 36, 0); /* internal file attributes */
786 PUT4L(tail + 38, 0); /* external file attributes (ignored) */
787 PUT4L(tail + 42, 0); /* offset of local header */
788 writen(outd, tail, 46); /* write central file header */
789 cent = 46;
790
791 /* write file name (use "-" for stdin) */
792 if (name == NULL)
793 writen(outd, (unsigned char *)"-", 1);
794 else
795 writen(outd, (unsigned char *)name, strlen(name));
796 cent += name == NULL ? 1 : strlen(name);
797
798 /* write extended timestamp extra field block (9 bytes) */
799 PUT2L(tail, 0x5455); /* extended timestamp signature */
800 PUT2L(tail + 2, 5); /* number of data bytes in this block */
801 tail[4] = 1; /* flag presence of mod time */
802 PUT4L(tail + 5, mtime); /* mod time */
803 writen(outd, tail, 9); /* write extra field block */
804 cent += 9;
805
806 /* write end of central directory record */
807 PUT4L(tail, 0x06054b50UL); /* end of central directory signature */
808 PUT2L(tail + 4, 0); /* number of this disk */
809 PUT2L(tail + 6, 0); /* disk with start of central directory */
810 PUT2L(tail + 8, 1); /* number of entries on this disk */
811 PUT2L(tail + 10, 1); /* total number of entries */
812 PUT4L(tail + 12, cent); /* size of central directory */
Dees_Troy3bde1232012-09-22 08:10:28 -0400813 PUT4L(tail + 16, head + clen + 16); /* offset of central directory */
Dees_Troy51a0e822012-09-05 15:24:24 -0400814 PUT2L(tail + 20, 0); /* no zip file comment */
815 writen(outd, tail, 22); /* write end of central directory record */
816 }
817 else if (form) { /* zlib */
818 PUT4M(tail, check);
819 writen(outd, tail, 4);
820 }
821 else { /* gzip */
822 PUT4L(tail, check);
823 PUT4L(tail + 4, ulen);
824 writen(outd, tail, 8);
825 }
826}
827
828/* compute check value depending on format */
829#define CHECK(a,b,c) (form == 1 ? adler32(a,b,c) : crc32(a,b,c))
830
831#ifndef NOTHREAD
832/* -- threaded portions of pigz -- */
833
834/* -- check value combination routines for parallel calculation -- */
835
836#define COMB(a,b,c) (form == 1 ? adler32_comb(a,b,c) : crc32_comb(a,b,c))
837/* combine two crc-32's or two adler-32's (copied from zlib 1.2.3 so that pigz
838 can be compatible with older versions of zlib) */
839
840/* we copy the combination routines from zlib here, in order to avoid
841 linkage issues with the zlib 1.2.3 builds on Sun, Ubuntu, and others */
842
843local unsigned long gf2_matrix_times(unsigned long *mat, unsigned long vec)
844{
845 unsigned long sum;
846
847 sum = 0;
848 while (vec) {
849 if (vec & 1)
850 sum ^= *mat;
851 vec >>= 1;
852 mat++;
853 }
854 return sum;
855}
856
857local void gf2_matrix_square(unsigned long *square, unsigned long *mat)
858{
859 int n;
860
861 for (n = 0; n < 32; n++)
862 square[n] = gf2_matrix_times(mat, mat[n]);
863}
864
865local unsigned long crc32_comb(unsigned long crc1, unsigned long crc2,
866 size_t len2)
867{
868 int n;
869 unsigned long row;
870 unsigned long even[32]; /* even-power-of-two zeros operator */
871 unsigned long odd[32]; /* odd-power-of-two zeros operator */
872
873 /* degenerate case */
874 if (len2 == 0)
875 return crc1;
876
877 /* put operator for one zero bit in odd */
878 odd[0] = 0xedb88320UL; /* CRC-32 polynomial */
879 row = 1;
880 for (n = 1; n < 32; n++) {
881 odd[n] = row;
882 row <<= 1;
883 }
884
885 /* put operator for two zero bits in even */
886 gf2_matrix_square(even, odd);
887
888 /* put operator for four zero bits in odd */
889 gf2_matrix_square(odd, even);
890
891 /* apply len2 zeros to crc1 (first square will put the operator for one
892 zero byte, eight zero bits, in even) */
893 do {
894 /* apply zeros operator for this bit of len2 */
895 gf2_matrix_square(even, odd);
896 if (len2 & 1)
897 crc1 = gf2_matrix_times(even, crc1);
898 len2 >>= 1;
899
900 /* if no more bits set, then done */
901 if (len2 == 0)
902 break;
903
904 /* another iteration of the loop with odd and even swapped */
905 gf2_matrix_square(odd, even);
906 if (len2 & 1)
907 crc1 = gf2_matrix_times(odd, crc1);
908 len2 >>= 1;
909
910 /* if no more bits set, then done */
911 } while (len2 != 0);
912
913 /* return combined crc */
914 crc1 ^= crc2;
915 return crc1;
916}
917
918#define BASE 65521U /* largest prime smaller than 65536 */
919#define LOW16 0xffff /* mask lower 16 bits */
920
921local unsigned long adler32_comb(unsigned long adler1, unsigned long adler2,
922 size_t len2)
923{
924 unsigned long sum1;
925 unsigned long sum2;
926 unsigned rem;
927
928 /* the derivation of this formula is left as an exercise for the reader */
929 rem = (unsigned)(len2 % BASE);
930 sum1 = adler1 & LOW16;
931 sum2 = (rem * sum1) % BASE;
932 sum1 += (adler2 & LOW16) + BASE - 1;
933 sum2 += ((adler1 >> 16) & LOW16) + ((adler2 >> 16) & LOW16) + BASE - rem;
934 if (sum1 >= BASE) sum1 -= BASE;
935 if (sum1 >= BASE) sum1 -= BASE;
936 if (sum2 >= (BASE << 1)) sum2 -= (BASE << 1);
937 if (sum2 >= BASE) sum2 -= BASE;
938 return sum1 | (sum2 << 16);
939}
940
941/* -- pool of spaces for buffer management -- */
942
943/* These routines manage a pool of spaces. Each pool specifies a fixed size
944 buffer to be contained in each space. Each space has a use count, which
945 when decremented to zero returns the space to the pool. If a space is
946 requested from the pool and the pool is empty, a space is immediately
947 created unless a specified limit on the number of spaces has been reached.
948 Only if the limit is reached will it wait for a space to be returned to the
949 pool. Each space knows what pool it belongs to, so that it can be returned.
950 */
951
952/* a space (one buffer for each space) */
953struct space {
954 lock *use; /* use count -- return to pool when zero */
Dees_Troy3bde1232012-09-22 08:10:28 -0400955 unsigned char *buf; /* buffer of size size */
956 size_t size; /* current size of this buffer */
957 size_t len; /* for application usage (initially zero) */
Dees_Troy51a0e822012-09-05 15:24:24 -0400958 struct pool *pool; /* pool to return to */
959 struct space *next; /* for pool linked list */
960};
961
Dees_Troy3bde1232012-09-22 08:10:28 -0400962/* pool of spaces (one pool for each type needed) */
Dees_Troy51a0e822012-09-05 15:24:24 -0400963struct pool {
964 lock *have; /* unused spaces available, lock for list */
965 struct space *head; /* linked list of available buffers */
Dees_Troy3bde1232012-09-22 08:10:28 -0400966 size_t size; /* size of new buffers in this pool */
Dees_Troy51a0e822012-09-05 15:24:24 -0400967 int limit; /* number of new spaces allowed, or -1 */
968 int made; /* number of buffers made */
969};
970
971/* initialize a pool (pool structure itself provided, not allocated) -- the
972 limit is the maximum number of spaces in the pool, or -1 to indicate no
973 limit, i.e., to never wait for a buffer to return to the pool */
974local void new_pool(struct pool *pool, size_t size, int limit)
975{
976 pool->have = new_lock(0);
977 pool->head = NULL;
978 pool->size = size;
979 pool->limit = limit;
980 pool->made = 0;
981}
982
983/* get a space from a pool -- the use count is initially set to one, so there
984 is no need to call use_space() for the first use */
985local struct space *get_space(struct pool *pool)
986{
987 struct space *space;
988
989 /* if can't create any more, wait for a space to show up */
990 possess(pool->have);
991 if (pool->limit == 0)
992 wait_for(pool->have, NOT_TO_BE, 0);
993
994 /* if a space is available, pull it from the list and return it */
995 if (pool->head != NULL) {
996 space = pool->head;
997 possess(space->use);
998 pool->head = space->next;
999 twist(pool->have, BY, -1); /* one less in pool */
1000 twist(space->use, TO, 1); /* initially one user */
Dees_Troy3bde1232012-09-22 08:10:28 -04001001 space->len = 0;
Dees_Troy51a0e822012-09-05 15:24:24 -04001002 return space;
1003 }
1004
1005 /* nothing available, don't want to wait, make a new space */
1006 assert(pool->limit != 0);
1007 if (pool->limit > 0)
1008 pool->limit--;
1009 pool->made++;
1010 release(pool->have);
1011 space = malloc(sizeof(struct space));
1012 if (space == NULL)
1013 bail("not enough memory", "");
1014 space->use = new_lock(1); /* initially one user */
1015 space->buf = malloc(pool->size);
1016 if (space->buf == NULL)
1017 bail("not enough memory", "");
Dees_Troy3bde1232012-09-22 08:10:28 -04001018 space->size = pool->size;
1019 space->len = 0;
Dees_Troy51a0e822012-09-05 15:24:24 -04001020 space->pool = pool; /* remember the pool this belongs to */
1021 return space;
1022}
1023
Dees_Troy3bde1232012-09-22 08:10:28 -04001024/* compute next size up by multiplying by about 2**(1/3) and round to the next
1025 power of 2 if we're close (so three applications results in doubling) -- if
1026 small, go up to at least 16, if overflow, go to max size_t value */
1027local size_t grow(size_t size)
1028{
1029 size_t was, top;
1030 int shift;
1031
1032 was = size;
1033 size += size >> 2;
1034 top = size;
1035 for (shift = 0; top > 7; shift++)
1036 top >>= 1;
1037 if (top == 7)
1038 size = (size_t)1 << (shift + 3);
1039 if (size < 16)
1040 size = 16;
1041 if (size <= was)
1042 size = (size_t)0 - 1;
1043 return size;
1044}
1045
1046/* increase the size of the buffer in space */
1047local void grow_space(struct space *space)
1048{
1049 size_t more;
1050
1051 /* compute next size up */
1052 more = grow(space->size);
1053 if (more == space->size)
1054 bail("not enough memory", "");
1055
1056 /* reallocate the buffer */
1057 space->buf = realloc(space->buf, more);
1058 if (space->buf == NULL)
1059 bail("not enough memory", "");
1060 space->size = more;
1061}
1062
Dees_Troy51a0e822012-09-05 15:24:24 -04001063/* increment the use count to require one more drop before returning this space
1064 to the pool */
1065local void use_space(struct space *space)
1066{
1067 possess(space->use);
1068 twist(space->use, BY, +1);
1069}
1070
1071/* drop a space, returning it to the pool if the use count is zero */
1072local void drop_space(struct space *space)
1073{
1074 int use;
1075 struct pool *pool;
1076
1077 possess(space->use);
1078 use = peek_lock(space->use);
1079 assert(use != 0);
1080 if (use == 1) {
1081 pool = space->pool;
1082 possess(pool->have);
1083 space->next = pool->head;
1084 pool->head = space;
1085 twist(pool->have, BY, +1);
1086 }
1087 twist(space->use, BY, -1);
1088}
1089
1090/* free the memory and lock resources of a pool -- return number of spaces for
1091 debugging and resource usage measurement */
1092local int free_pool(struct pool *pool)
1093{
1094 int count;
1095 struct space *space;
1096
1097 possess(pool->have);
1098 count = 0;
1099 while ((space = pool->head) != NULL) {
1100 pool->head = space->next;
1101 free(space->buf);
1102 free_lock(space->use);
1103 free(space);
1104 count++;
1105 }
Dees_Troy3bde1232012-09-22 08:10:28 -04001106 assert(count == pool->made);
Dees_Troy51a0e822012-09-05 15:24:24 -04001107 release(pool->have);
1108 free_lock(pool->have);
Dees_Troy51a0e822012-09-05 15:24:24 -04001109 return count;
1110}
1111
1112/* input and output buffer pools */
1113local struct pool in_pool;
1114local struct pool out_pool;
Dees_Troy3bde1232012-09-22 08:10:28 -04001115local struct pool dict_pool;
1116local struct pool lens_pool;
Dees_Troy51a0e822012-09-05 15:24:24 -04001117
1118/* -- parallel compression -- */
1119
1120/* compress or write job (passed from compress list to write list) -- if seq is
1121 equal to -1, compress_thread is instructed to return; if more is false then
1122 this is the last chunk, which after writing tells write_thread to return */
1123struct job {
1124 long seq; /* sequence number */
1125 int more; /* true if this is not the last chunk */
1126 struct space *in; /* input data to compress */
1127 struct space *out; /* dictionary or resulting compressed data */
Dees_Troy3bde1232012-09-22 08:10:28 -04001128 struct space *lens; /* coded list of flush block lengths */
Dees_Troy51a0e822012-09-05 15:24:24 -04001129 unsigned long check; /* check value for input data */
1130 lock *calc; /* released when check calculation complete */
1131 struct job *next; /* next job in the list (either list) */
1132};
1133
1134/* list of compress jobs (with tail for appending to list) */
1135local lock *compress_have = NULL; /* number of compress jobs waiting */
1136local struct job *compress_head, **compress_tail;
1137
1138/* list of write jobs */
1139local lock *write_first; /* lowest sequence number in list */
1140local struct job *write_head;
1141
1142/* number of compression threads running */
1143local int cthreads = 0;
1144
1145/* write thread if running */
1146local thread *writeth = NULL;
1147
1148/* setup job lists (call from main thread) */
1149local void setup_jobs(void)
1150{
1151 /* set up only if not already set up*/
1152 if (compress_have != NULL)
1153 return;
1154
1155 /* allocate locks and initialize lists */
1156 compress_have = new_lock(0);
1157 compress_head = NULL;
1158 compress_tail = &compress_head;
1159 write_first = new_lock(-1);
1160 write_head = NULL;
1161
Dees_Troy3bde1232012-09-22 08:10:28 -04001162 /* initialize buffer pools (initial size for out_pool not critical, since
1163 buffers will be grown in size if needed -- initial size chosen to make
1164 this unlikely -- same for lens_pool) */
1165 new_pool(&in_pool, size, INBUFS(procs));
1166 new_pool(&out_pool, OUTPOOL(size), -1);
1167 new_pool(&dict_pool, DICT, -1);
1168 new_pool(&lens_pool, size >> (RSYNCBITS - 1), -1);
Dees_Troy51a0e822012-09-05 15:24:24 -04001169}
1170
1171/* command the compress threads to all return, then join them all (call from
1172 main thread), free all the thread-related resources */
1173local void finish_jobs(void)
1174{
1175 struct job job;
1176 int caught;
1177
1178 /* only do this once */
1179 if (compress_have == NULL)
1180 return;
1181
1182 /* command all of the extant compress threads to return */
1183 possess(compress_have);
1184 job.seq = -1;
1185 job.next = NULL;
1186 compress_head = &job;
1187 compress_tail = &(job.next);
1188 twist(compress_have, BY, +1); /* will wake them all up */
1189
1190 /* join all of the compress threads, verify they all came back */
1191 caught = join_all();
1192 Trace(("-- joined %d compress threads", caught));
1193 assert(caught == cthreads);
1194 cthreads = 0;
1195
1196 /* free the resources */
Dees_Troy3bde1232012-09-22 08:10:28 -04001197 caught = free_pool(&lens_pool);
1198 Trace(("-- freed %d block lengths buffers", caught));
1199 caught = free_pool(&dict_pool);
1200 Trace(("-- freed %d dictionary buffers", caught));
Dees_Troy51a0e822012-09-05 15:24:24 -04001201 caught = free_pool(&out_pool);
1202 Trace(("-- freed %d output buffers", caught));
1203 caught = free_pool(&in_pool);
1204 Trace(("-- freed %d input buffers", caught));
1205 free_lock(write_first);
1206 free_lock(compress_have);
1207 compress_have = NULL;
1208}
1209
Dees_Troy3bde1232012-09-22 08:10:28 -04001210/* compress all strm->avail_in bytes at strm->next_in to out->buf, updating
1211 out->len, grow the size of the buffer (out->size) if necessary -- respect
1212 the size limitations of the zlib stream data types (size_t may be larger
1213 than unsigned) */
1214local void deflate_engine(z_stream *strm, struct space *out, int flush)
1215{
1216 size_t room;
1217
1218 do {
1219 room = out->size - out->len;
1220 if (room == 0) {
1221 grow_space(out);
1222 room = out->size - out->len;
1223 }
1224 strm->next_out = out->buf + out->len;
1225 strm->avail_out = room < UINT_MAX ? (unsigned)room : UINT_MAX;
1226 (void)deflate(strm, flush);
1227 out->len = strm->next_out - out->buf;
1228 } while (strm->avail_out == 0);
1229 assert(strm->avail_in == 0);
1230}
1231
Dees_Troy51a0e822012-09-05 15:24:24 -04001232/* get the next compression job from the head of the list, compress and compute
1233 the check value on the input, and put a job in the write list with the
1234 results -- keep looking for more jobs, returning when a job is found with a
1235 sequence number of -1 (leave that job in the list for other incarnations to
1236 find) */
1237local void compress_thread(void *dummy)
1238{
Dees_Troy3bde1232012-09-22 08:10:28 -04001239 struct job *job; /* job pulled and working on */
1240 struct job *here, **prior; /* pointers for inserting in write list */
Dees_Troy51a0e822012-09-05 15:24:24 -04001241 unsigned long check; /* check value of input */
Dees_Troy3bde1232012-09-22 08:10:28 -04001242 unsigned char *next; /* pointer for blocks, check value data */
1243 size_t left; /* input left to process */
Dees_Troy51a0e822012-09-05 15:24:24 -04001244 size_t len; /* remaining bytes to compress/check */
Dees_Troy3bde1232012-09-22 08:10:28 -04001245#if ZLIB_VERNUM >= 0x1260
1246 int bits; /* deflate pending bits */
1247#endif
Dees_Troy51a0e822012-09-05 15:24:24 -04001248 z_stream strm; /* deflate stream */
1249
1250 (void)dummy;
1251
1252 /* initialize the deflate stream for this thread */
1253 strm.zfree = Z_NULL;
1254 strm.zalloc = Z_NULL;
1255 strm.opaque = Z_NULL;
1256 if (deflateInit2(&strm, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY) !=
1257 Z_OK)
1258 bail("not enough memory", "");
1259
1260 /* keep looking for work */
1261 for (;;) {
1262 /* get a job (like I tell my son) */
1263 possess(compress_have);
1264 wait_for(compress_have, NOT_TO_BE, 0);
1265 job = compress_head;
1266 assert(job != NULL);
1267 if (job->seq == -1)
1268 break;
1269 compress_head = job->next;
1270 if (job->next == NULL)
1271 compress_tail = &compress_head;
1272 twist(compress_have, BY, -1);
1273
1274 /* got a job -- initialize and set the compression level (note that if
1275 deflateParams() is called immediately after deflateReset(), there is
1276 no need to initialize the input/output for the stream) */
1277 Trace(("-- compressing #%ld", job->seq));
1278 (void)deflateReset(&strm);
1279 (void)deflateParams(&strm, level, Z_DEFAULT_STRATEGY);
1280
Dees_Troy3bde1232012-09-22 08:10:28 -04001281 /* set dictionary if provided, release that input or dictionary buffer
1282 (not NULL if dict is true and if this is not the first work unit) */
Dees_Troy51a0e822012-09-05 15:24:24 -04001283 if (job->out != NULL) {
1284 len = job->out->len;
Dees_Troy3bde1232012-09-22 08:10:28 -04001285 left = len < DICT ? len : DICT;
1286 deflateSetDictionary(&strm, job->out->buf + (len - left), left);
Dees_Troy51a0e822012-09-05 15:24:24 -04001287 drop_space(job->out);
1288 }
1289
Dees_Troy3bde1232012-09-22 08:10:28 -04001290 /* set up input and output */
Dees_Troy51a0e822012-09-05 15:24:24 -04001291 job->out = get_space(&out_pool);
1292 strm.next_in = job->in->buf;
1293 strm.next_out = job->out->buf;
1294
Dees_Troy3bde1232012-09-22 08:10:28 -04001295 /* compress each block, either flushing or finishing */
1296 next = job->lens == NULL ? NULL : job->lens->buf;
1297 left = job->in->len;
1298 job->out->len = 0;
1299 do {
1300 /* decode next block length from blocks list */
1301 len = next == NULL ? 128 : *next++;
1302 if (len < 128) /* 64..32831 */
1303 len = (len << 8) + (*next++) + 64;
1304 else if (len == 128) /* end of list */
1305 len = left;
1306 else if (len < 192) /* 1..63 */
1307 len &= 0x3f;
1308 else { /* 32832..4227135 */
1309 len = ((len & 0x3f) << 16) + (*next++ << 8) + 32832U;
1310 len += *next++;
1311 }
1312 left -= len;
Dees_Troy51a0e822012-09-05 15:24:24 -04001313
Dees_Troy3bde1232012-09-22 08:10:28 -04001314 /* run MAXP2-sized amounts of input through deflate -- this loop is
1315 needed for those cases where the unsigned type is smaller than
1316 the size_t type, or when len is close to the limit of the size_t
1317 type */
1318 while (len > MAXP2) {
1319 strm.avail_in = MAXP2;
1320 deflate_engine(&strm, job->out, Z_NO_FLUSH);
1321 len -= MAXP2;
1322 }
1323
1324 /* run the last piece through deflate -- end on a byte boundary,
1325 using a sync marker if necessary, or finish the deflate stream
1326 if this is the last block */
1327 strm.avail_in = (unsigned)len;
1328 if (left || job->more) {
1329#if ZLIB_VERNUM >= 0x1260
1330 deflate_engine(&strm, job->out, Z_BLOCK);
1331
1332 /* add just enough empty blocks to get to a byte boundary */
1333 (void)deflatePending(&strm, Z_NULL, &bits);
1334 if (bits & 1)
1335 deflate_engine(&strm, job->out, Z_SYNC_FLUSH);
1336 else if (bits & 7) {
1337 do {
1338 bits = deflatePrime(&strm, 10, 2); /* static empty */
1339 assert(bits == Z_OK);
1340 (void)deflatePending(&strm, Z_NULL, &bits);
1341 } while (bits & 7);
1342 deflate_engine(&strm, job->out, Z_BLOCK);
1343 }
1344#else
1345 deflate_engine(&strm, job->out, Z_SYNC_FLUSH);
1346#endif
1347 }
1348 else
1349 deflate_engine(&strm, job->out, Z_FINISH);
1350 } while (left);
1351 if (job->lens != NULL) {
1352 drop_space(job->lens);
1353 job->lens = NULL;
1354 }
Dees_Troy51a0e822012-09-05 15:24:24 -04001355 Trace(("-- compressed #%ld%s", job->seq, job->more ? "" : " (last)"));
1356
1357 /* reserve input buffer until check value has been calculated */
1358 use_space(job->in);
1359
1360 /* insert write job in list in sorted order, alert write thread */
1361 possess(write_first);
1362 prior = &write_head;
1363 while ((here = *prior) != NULL) {
1364 if (here->seq > job->seq)
1365 break;
1366 prior = &(here->next);
1367 }
1368 job->next = here;
1369 *prior = job;
1370 twist(write_first, TO, write_head->seq);
1371
1372 /* calculate the check value in parallel with writing, alert the write
1373 thread that the calculation is complete, and drop this usage of the
1374 input buffer */
1375 len = job->in->len;
1376 next = job->in->buf;
1377 check = CHECK(0L, Z_NULL, 0);
Dees_Troy3bde1232012-09-22 08:10:28 -04001378 while (len > MAXP2) {
1379 check = CHECK(check, next, MAXP2);
1380 len -= MAXP2;
1381 next += MAXP2;
Dees_Troy51a0e822012-09-05 15:24:24 -04001382 }
1383 check = CHECK(check, next, (unsigned)len);
1384 drop_space(job->in);
1385 job->check = check;
Dees_Troy3bde1232012-09-22 08:10:28 -04001386 Trace(("-- checked #%ld%s", job->seq, job->more ? "" : " (last)"));
Dees_Troy51a0e822012-09-05 15:24:24 -04001387 possess(job->calc);
1388 twist(job->calc, TO, 1);
Dees_Troy51a0e822012-09-05 15:24:24 -04001389
1390 /* done with that one -- go find another job */
1391 }
1392
1393 /* found job with seq == -1 -- free deflate memory and return to join */
1394 release(compress_have);
Dees_Troy3bde1232012-09-22 08:10:28 -04001395 (void)deflateEnd(&strm);
Dees_Troy51a0e822012-09-05 15:24:24 -04001396}
1397
1398/* collect the write jobs off of the list in sequence order and write out the
1399 compressed data until the last chunk is written -- also write the header and
1400 trailer and combine the individual check values of the input buffers */
1401local void write_thread(void *dummy)
1402{
1403 long seq; /* next sequence number looking for */
1404 struct job *job; /* job pulled and working on */
1405 size_t len; /* input length */
1406 int more; /* true if more chunks to write */
1407 unsigned long head; /* header length */
1408 unsigned long ulen; /* total uncompressed size (overflow ok) */
1409 unsigned long clen; /* total compressed size (overflow ok) */
1410 unsigned long check; /* check value of uncompressed data */
1411
1412 (void)dummy;
1413
1414 /* build and write header */
1415 Trace(("-- write thread running"));
1416 head = put_header();
1417
Dees_Troy3bde1232012-09-22 08:10:28 -04001418 /* process output of compress threads until end of input */
Dees_Troy51a0e822012-09-05 15:24:24 -04001419 ulen = clen = 0;
1420 check = CHECK(0L, Z_NULL, 0);
1421 seq = 0;
1422 do {
1423 /* get next write job in order */
1424 possess(write_first);
1425 wait_for(write_first, TO_BE, seq);
1426 job = write_head;
1427 write_head = job->next;
1428 twist(write_first, TO, write_head == NULL ? -1 : write_head->seq);
1429
1430 /* update lengths, save uncompressed length for COMB */
1431 more = job->more;
1432 len = job->in->len;
1433 drop_space(job->in);
1434 ulen += (unsigned long)len;
1435 clen += (unsigned long)(job->out->len);
1436
1437 /* write the compressed data and drop the output buffer */
1438 Trace(("-- writing #%ld", seq));
1439 writen(outd, job->out->buf, job->out->len);
1440 drop_space(job->out);
1441 Trace(("-- wrote #%ld%s", seq, more ? "" : " (last)"));
1442
1443 /* wait for check calculation to complete, then combine, once
1444 the compress thread is done with the input, release it */
1445 possess(job->calc);
1446 wait_for(job->calc, TO_BE, 1);
1447 release(job->calc);
1448 check = COMB(check, job->check, len);
1449
1450 /* free the job */
1451 free_lock(job->calc);
1452 free(job);
1453
1454 /* get the next buffer in sequence */
1455 seq++;
1456 } while (more);
1457
1458 /* write trailer */
1459 put_trailer(ulen, clen, check, head);
1460
1461 /* verify no more jobs, prepare for next use */
1462 possess(compress_have);
1463 assert(compress_head == NULL && peek_lock(compress_have) == 0);
1464 release(compress_have);
1465 possess(write_first);
1466 assert(write_head == NULL);
1467 twist(write_first, TO, -1);
1468}
1469
Dees_Troy3bde1232012-09-22 08:10:28 -04001470/* encode a hash hit to the block lengths list -- hit == 0 ends the list */
1471local void append_len(struct job *job, size_t len)
1472{
1473 struct space *lens;
1474
1475 assert(len < 4227136UL);
1476 if (job->lens == NULL)
1477 job->lens = get_space(&lens_pool);
1478 lens = job->lens;
1479 if (lens->size < lens->len + 3)
1480 grow_space(lens);
1481 if (len < 64)
1482 lens->buf[lens->len++] = len + 128;
1483 else if (len < 32832U) {
1484 len -= 64;
1485 lens->buf[lens->len++] = len >> 8;
1486 lens->buf[lens->len++] = len;
1487 }
1488 else {
1489 len -= 32832U;
1490 lens->buf[lens->len++] = (len >> 16) + 192;
1491 lens->buf[lens->len++] = len >> 8;
1492 lens->buf[lens->len++] = len;
1493 }
1494}
1495
Dees_Troy51a0e822012-09-05 15:24:24 -04001496/* compress ind to outd, using multiple threads for the compression and check
1497 value calculations and one other thread for writing the output -- compress
1498 threads will be launched and left running (waiting actually) to support
1499 subsequent calls of parallel_compress() */
1500local void parallel_compress(void)
1501{
1502 long seq; /* sequence number */
Dees_Troy3bde1232012-09-22 08:10:28 -04001503 struct space *curr; /* input data to compress */
1504 struct space *next; /* input data that follows curr */
1505 struct space *hold; /* input data that follows next */
1506 struct space *dict; /* dictionary for next compression */
Dees_Troy51a0e822012-09-05 15:24:24 -04001507 struct job *job; /* job for compress, then write */
1508 int more; /* true if more input to read */
Dees_Troy3bde1232012-09-22 08:10:28 -04001509 unsigned hash; /* hash for rsyncable */
1510 unsigned char *scan; /* next byte to compute hash on */
1511 unsigned char *end; /* after end of data to compute hash on */
1512 unsigned char *last; /* position after last hit */
1513 size_t left; /* last hit in curr to end of curr */
1514 size_t len; /* for various length computations */
Dees_Troy51a0e822012-09-05 15:24:24 -04001515
1516 /* if first time or after an option change, setup the job lists */
1517 setup_jobs();
1518
1519 /* start write thread */
1520 writeth = launch(write_thread, NULL);
1521
1522 /* read from input and start compress threads (write thread will pick up
Dees_Troy3bde1232012-09-22 08:10:28 -04001523 the output of the compress threads) */
Dees_Troy51a0e822012-09-05 15:24:24 -04001524 seq = 0;
Dees_Troy51a0e822012-09-05 15:24:24 -04001525 next = get_space(&in_pool);
Dees_Troy3bde1232012-09-22 08:10:28 -04001526 next->len = readn(ind, next->buf, next->size);
1527 hold = NULL;
1528 dict = NULL;
1529 scan = next->buf;
1530 hash = RSYNCHIT;
1531 left = 0;
Dees_Troy51a0e822012-09-05 15:24:24 -04001532 do {
Dees_Troy3bde1232012-09-22 08:10:28 -04001533 /* create a new job */
Dees_Troy51a0e822012-09-05 15:24:24 -04001534 job = malloc(sizeof(struct job));
1535 if (job == NULL)
1536 bail("not enough memory", "");
Dees_Troy51a0e822012-09-05 15:24:24 -04001537 job->calc = new_lock(0);
1538
Dees_Troy3bde1232012-09-22 08:10:28 -04001539 /* update input spaces */
1540 curr = next;
1541 next = hold;
1542 hold = NULL;
1543
1544 /* get more input if we don't already have some */
1545 if (next == NULL) {
Dees_Troy51a0e822012-09-05 15:24:24 -04001546 next = get_space(&in_pool);
Dees_Troy3bde1232012-09-22 08:10:28 -04001547 next->len = readn(ind, next->buf, next->size);
1548 }
1549
1550 /* if rsyncable, generate block lengths and prepare curr for job to
1551 likely have less than size bytes (up to the last hash hit) */
1552 job->lens = NULL;
1553 if (rsync && curr->len) {
1554 /* compute the hash function starting where we last left off to
1555 cover either size bytes or to EOF, whichever is less, through
1556 the data in curr (and in the next loop, through next) -- save
1557 the block lengths resulting from the hash hits in the job->lens
1558 list */
1559 if (left == 0) {
1560 /* scan is in curr */
1561 last = curr->buf;
1562 end = curr->buf + curr->len;
1563 while (scan < end) {
1564 hash = ((hash << 1) ^ *scan++) & RSYNCMASK;
1565 if (hash == RSYNCHIT) {
1566 len = scan - last;
1567 append_len(job, len);
1568 last = scan;
1569 }
1570 }
1571
1572 /* continue scan in next */
1573 left = scan - last;
1574 scan = next->buf;
1575 }
1576
1577 /* scan in next for enough bytes to fill curr, or what is available
1578 in next, whichever is less (if next isn't full, then we're at
1579 the end of the file) -- the bytes in curr since the last hit,
1580 stored in left, counts towards the size of the first block */
1581 last = next->buf;
1582 len = curr->size - curr->len;
1583 if (len > next->len)
1584 len = next->len;
1585 end = next->buf + len;
1586 while (scan < end) {
1587 hash = ((hash << 1) ^ *scan++) & RSYNCMASK;
1588 if (hash == RSYNCHIT) {
1589 len = (scan - last) + left;
1590 left = 0;
1591 append_len(job, len);
1592 last = scan;
1593 }
1594 }
1595 append_len(job, 0);
1596
1597 /* create input in curr for job up to last hit or entire buffer if
1598 no hits at all -- save remainder in next and possibly hold */
1599 len = (job->lens->len == 1 ? scan : last) - next->buf;
1600 if (len) {
1601 /* got hits in next, or no hits in either -- copy to curr */
1602 memcpy(curr->buf + curr->len, next->buf, len);
1603 curr->len += len;
1604 memmove(next->buf, next->buf + len, next->len - len);
1605 next->len -= len;
1606 scan -= len;
1607 left = 0;
1608 }
1609 else if (job->lens->len != 1 && left && next->len) {
1610 /* had hits in curr, but none in next, and last hit in curr
1611 wasn't right at the end, so we have input there to save --
1612 use curr up to the last hit, save the rest, moving next to
1613 hold */
1614 hold = next;
1615 next = get_space(&in_pool);
1616 memcpy(next->buf, curr->buf + (curr->len - left), left);
1617 next->len = left;
1618 curr->len -= left;
1619 }
1620 else {
1621 /* else, last match happened to be right at the end of curr,
1622 or we're at the end of the input compressing the rest */
1623 left = 0;
Dees_Troy51a0e822012-09-05 15:24:24 -04001624 }
1625 }
Dees_Troy3bde1232012-09-22 08:10:28 -04001626
1627 /* compress curr->buf to curr->len -- compress thread will drop curr */
1628 job->in = curr;
1629
1630 /* set job->more if there is more to compress after curr */
1631 more = next->len != 0;
Dees_Troy51a0e822012-09-05 15:24:24 -04001632 job->more = more;
Dees_Troy3bde1232012-09-22 08:10:28 -04001633
1634 /* provide dictionary for this job, prepare dictionary for next job */
1635 job->out = dict;
1636 if (more && setdict) {
1637 if (curr->len >= DICT || job->out == NULL) {
1638 dict = curr;
1639 use_space(dict);
1640 }
1641 else {
1642 dict = get_space(&dict_pool);
1643 len = DICT - curr->len;
1644 memcpy(dict->buf, job->out->buf + (job->out->len - len), len);
1645 memcpy(dict->buf + len, curr->buf, curr->len);
1646 dict->len = DICT;
1647 }
1648 }
1649
1650 /* preparation of job is complete */
1651 job->seq = seq;
Dees_Troy51a0e822012-09-05 15:24:24 -04001652 Trace(("-- read #%ld%s", seq, more ? "" : " (last)"));
1653 if (++seq < 1)
1654 bail("input too long: ", in);
1655
1656 /* start another compress thread if needed */
1657 if (cthreads < seq && cthreads < procs) {
1658 (void)launch(compress_thread, NULL);
1659 cthreads++;
1660 }
1661
1662 /* put job at end of compress list, let all the compressors know */
1663 possess(compress_have);
1664 job->next = NULL;
1665 *compress_tail = job;
1666 compress_tail = &(job->next);
1667 twist(compress_have, BY, +1);
Dees_Troy51a0e822012-09-05 15:24:24 -04001668 } while (more);
Dees_Troy3bde1232012-09-22 08:10:28 -04001669 drop_space(next);
Dees_Troy51a0e822012-09-05 15:24:24 -04001670
1671 /* wait for the write thread to complete (we leave the compress threads out
1672 there and waiting in case there is another stream to compress) */
1673 join(writeth);
1674 writeth = NULL;
1675 Trace(("-- write thread joined"));
1676}
1677
1678#endif
1679
Dees_Troy3bde1232012-09-22 08:10:28 -04001680/* repeated code in single_compress to compress available input and write it */
1681#define DEFLATE_WRITE(flush) \
1682 do { \
1683 do { \
1684 strm->avail_out = out_size; \
1685 strm->next_out = out; \
1686 (void)deflate(strm, flush); \
1687 writen(outd, out, out_size - strm->avail_out); \
1688 clen += out_size - strm->avail_out; \
1689 } while (strm->avail_out == 0); \
1690 assert(strm->avail_in == 0); \
1691 } while (0)
1692
Dees_Troy51a0e822012-09-05 15:24:24 -04001693/* do a simple compression in a single thread from ind to outd -- if reset is
1694 true, instead free the memory that was allocated and retained for input,
1695 output, and deflate */
1696local void single_compress(int reset)
1697{
1698 size_t got; /* amount read */
1699 size_t more; /* amount of next read (0 if eof) */
Dees_Troy3bde1232012-09-22 08:10:28 -04001700 size_t start; /* start of next read */
1701 size_t block; /* bytes in current block for -i */
1702 unsigned hash; /* hash for rsyncable */
1703#if ZLIB_VERNUM >= 0x1260
1704 int bits; /* deflate pending bits */
1705#endif
1706 unsigned char *scan; /* pointer for hash computation */
1707 size_t left; /* bytes left to compress after hash hit */
Dees_Troy51a0e822012-09-05 15:24:24 -04001708 unsigned long head; /* header length */
1709 unsigned long ulen; /* total uncompressed size (overflow ok) */
1710 unsigned long clen; /* total compressed size (overflow ok) */
1711 unsigned long check; /* check value of uncompressed data */
1712 static unsigned out_size; /* size of output buffer */
1713 static unsigned char *in, *next, *out; /* reused i/o buffers */
1714 static z_stream *strm = NULL; /* reused deflate structure */
1715
1716 /* if requested, just release the allocations and return */
1717 if (reset) {
1718 if (strm != NULL) {
Dees_Troy3bde1232012-09-22 08:10:28 -04001719 (void)deflateEnd(strm);
Dees_Troy51a0e822012-09-05 15:24:24 -04001720 free(strm);
1721 free(out);
1722 free(next);
1723 free(in);
1724 strm = NULL;
1725 }
1726 return;
1727 }
1728
1729 /* initialize the deflate structure if this is the first time */
1730 if (strm == NULL) {
Dees_Troy3bde1232012-09-22 08:10:28 -04001731 out_size = size > MAXP2 ? MAXP2 : (unsigned)size;
Dees_Troy51a0e822012-09-05 15:24:24 -04001732 if ((in = malloc(size)) == NULL ||
1733 (next = malloc(size)) == NULL ||
1734 (out = malloc(out_size)) == NULL ||
1735 (strm = malloc(sizeof(z_stream))) == NULL)
1736 bail("not enough memory", "");
1737 strm->zfree = Z_NULL;
1738 strm->zalloc = Z_NULL;
1739 strm->opaque = Z_NULL;
1740 if (deflateInit2(strm, level, Z_DEFLATED, -15, 8,
1741 Z_DEFAULT_STRATEGY) != Z_OK)
1742 bail("not enough memory", "");
1743 }
1744
1745 /* write header */
1746 head = put_header();
1747
1748 /* set compression level in case it changed */
1749 (void)deflateReset(strm);
1750 (void)deflateParams(strm, level, Z_DEFAULT_STRATEGY);
1751
1752 /* do raw deflate and calculate check value */
Dees_Troy3bde1232012-09-22 08:10:28 -04001753 got = 0;
Dees_Troy51a0e822012-09-05 15:24:24 -04001754 more = readn(ind, next, size);
Dees_Troy3bde1232012-09-22 08:10:28 -04001755 ulen = (unsigned)more;
1756 start = 0;
1757 clen = 0;
1758 block = 0;
1759 check = CHECK(0L, Z_NULL, 0);
1760 hash = RSYNCHIT;
Dees_Troy51a0e822012-09-05 15:24:24 -04001761 do {
1762 /* get data to compress, see if there is any more input */
Dees_Troy3bde1232012-09-22 08:10:28 -04001763 if (got == 0) {
1764 scan = in; in = next; next = scan;
1765 strm->next_in = in + start;
1766 got = more;
1767 more = readn(ind, next, size);
1768 ulen += (unsigned long)more;
1769 start = 0;
Dees_Troy51a0e822012-09-05 15:24:24 -04001770 }
1771
Dees_Troy3bde1232012-09-22 08:10:28 -04001772 /* if rsyncable, compute hash until a hit or the end of the block */
1773 left = 0;
1774 if (rsync && got) {
1775 scan = strm->next_in;
1776 left = got;
1777 do {
1778 if (left == 0) {
1779 /* went to the end -- if no more or no hit in size bytes,
1780 then proceed to do a flush or finish with got bytes */
1781 if (more == 0 || got == size)
1782 break;
1783
1784 /* fill in[] with what's left there and as much as possible
1785 from next[] -- set up to continue hash hit search */
1786 memmove(in, strm->next_in, got);
1787 strm->next_in = in;
1788 scan = in + got;
1789 left = more > size - got ? size - got : more;
1790 memcpy(scan, next + start, left);
1791 got += left;
1792 more -= left;
1793 start += left;
1794
1795 /* if that emptied the next buffer, try to refill it */
1796 if (more == 0) {
1797 more = readn(ind, next, size);
1798 ulen += (unsigned long)more;
1799 start = 0;
1800 }
1801 }
1802 left--;
1803 hash = ((hash << 1) ^ *scan++) & RSYNCMASK;
1804 } while (hash != RSYNCHIT);
1805 got -= left;
1806 }
1807
1808 /* clear history for --independent option */
1809 if (!setdict) {
1810 block += got;
1811 if (block > size) {
1812 (void)deflateReset(strm);
1813 block = got;
1814 }
1815 }
1816
1817 /* compress MAXP2-size chunks in case unsigned type is small */
1818 while (got > MAXP2) {
1819 strm->avail_in = MAXP2;
1820 check = CHECK(check, strm->next_in, strm->avail_in);
1821 DEFLATE_WRITE(Z_NO_FLUSH);
1822 got -= MAXP2;
1823 }
1824
1825 /* compress the remainder, emit a block -- finish if end of input */
Dees_Troy51a0e822012-09-05 15:24:24 -04001826 strm->avail_in = (unsigned)got;
Dees_Troy3bde1232012-09-22 08:10:28 -04001827 got = left;
Dees_Troy51a0e822012-09-05 15:24:24 -04001828 check = CHECK(check, strm->next_in, strm->avail_in);
Dees_Troy3bde1232012-09-22 08:10:28 -04001829 if (more || got) {
1830#if ZLIB_VERNUM >= 0x1260
1831 DEFLATE_WRITE(Z_BLOCK);
1832 (void)deflatePending(strm, Z_NULL, &bits);
1833 if (bits & 1)
1834 DEFLATE_WRITE(Z_SYNC_FLUSH);
1835 else if (bits & 7) {
1836 do {
1837 bits = deflatePrime(strm, 10, 2);
1838 assert(bits == Z_OK);
1839 (void)deflatePending(strm, Z_NULL, &bits);
1840 } while (bits & 7);
1841 DEFLATE_WRITE(Z_NO_FLUSH);
1842 }
1843#else
1844 DEFLATE_WRITE(Z_SYNC_FLUSH);
1845#endif
1846 }
1847 else
1848 DEFLATE_WRITE(Z_FINISH);
Dees_Troy51a0e822012-09-05 15:24:24 -04001849
1850 /* do until no more input */
Dees_Troy3bde1232012-09-22 08:10:28 -04001851 } while (more || got);
Dees_Troy51a0e822012-09-05 15:24:24 -04001852
1853 /* write trailer */
1854 put_trailer(ulen, clen, check, head);
1855}
1856
1857/* --- decompression --- */
1858
1859/* globals for decompression and listing buffered reading */
1860#define BUF 32768U /* input buffer size */
1861local unsigned char in_buf[BUF]; /* input buffer */
1862local unsigned char *in_next; /* next unused byte in buffer */
1863local size_t in_left; /* number of unused bytes in buffer */
1864local int in_eof; /* true if reached end of file on input */
1865local int in_short; /* true if last read didn't fill buffer */
1866local off_t in_tot; /* total bytes read from input */
1867local off_t out_tot; /* total bytes written to output */
1868local unsigned long out_check; /* check value of output */
1869
1870#ifndef NOTHREAD
1871/* parallel reading */
1872
1873local unsigned char in_buf2[BUF]; /* second buffer for parallel reads */
1874local size_t in_len; /* data waiting in next buffer */
1875local int in_which; /* -1: start, 0: in_buf2, 1: in_buf */
1876local lock *load_state; /* value = 0 to wait, 1 to read a buffer */
1877local thread *load_thread; /* load_read() thread for joining */
1878
1879/* parallel read thread */
1880local void load_read(void *dummy)
1881{
1882 size_t len;
1883
1884 (void)dummy;
1885
1886 Trace(("-- launched decompress read thread"));
1887 do {
1888 possess(load_state);
1889 wait_for(load_state, TO_BE, 1);
1890 in_len = len = readn(ind, in_which ? in_buf : in_buf2, BUF);
1891 Trace(("-- decompress read thread read %lu bytes", len));
1892 twist(load_state, TO, 0);
1893 } while (len == BUF);
1894 Trace(("-- exited decompress read thread"));
1895}
1896
1897#endif
1898
Dees_Troy3bde1232012-09-22 08:10:28 -04001899/* load() is called when the input has been consumed in order to provide more
Dees_Troy51a0e822012-09-05 15:24:24 -04001900 input data: load the input buffer with BUF or less bytes (less if at end of
1901 file) from the file ind, set in_next to point to the in_left bytes read,
1902 update in_tot, and return in_left -- in_eof is set to true when in_left has
1903 gone to zero and there is no more data left to read from ind */
1904local size_t load(void)
1905{
1906 /* if already detected end of file, do nothing */
1907 if (in_short) {
1908 in_eof = 1;
Dees_Troy3bde1232012-09-22 08:10:28 -04001909 in_left = 0;
Dees_Troy51a0e822012-09-05 15:24:24 -04001910 return 0;
1911 }
1912
1913#ifndef NOTHREAD
1914 /* if first time in or procs == 1, read a buffer to have something to
1915 return, otherwise wait for the previous read job to complete */
1916 if (procs > 1) {
1917 /* if first time, fire up the read thread, ask for a read */
1918 if (in_which == -1) {
1919 in_which = 1;
1920 load_state = new_lock(1);
1921 load_thread = launch(load_read, NULL);
1922 }
1923
1924 /* wait for the previously requested read to complete */
1925 possess(load_state);
1926 wait_for(load_state, TO_BE, 0);
1927 release(load_state);
1928
1929 /* set up input buffer with the data just read */
1930 in_next = in_which ? in_buf : in_buf2;
1931 in_left = in_len;
1932
1933 /* if not at end of file, alert read thread to load next buffer,
1934 alternate between in_buf and in_buf2 */
1935 if (in_len == BUF) {
1936 in_which = 1 - in_which;
1937 possess(load_state);
1938 twist(load_state, TO, 1);
1939 }
1940
1941 /* at end of file -- join read thread (already exited), clean up */
1942 else {
1943 join(load_thread);
1944 free_lock(load_state);
1945 in_which = -1;
1946 }
1947 }
1948 else
1949#endif
1950 {
1951 /* don't use threads -- simply read a buffer into in_buf */
1952 in_left = readn(ind, in_next = in_buf, BUF);
1953 }
1954
1955 /* note end of file */
1956 if (in_left < BUF) {
1957 in_short = 1;
1958
1959 /* if we got bupkis, now is the time to mark eof */
1960 if (in_left == 0)
1961 in_eof = 1;
1962 }
1963
1964 /* update the total and return the available bytes */
1965 in_tot += in_left;
1966 return in_left;
1967}
1968
1969/* initialize for reading new input */
1970local void in_init(void)
1971{
1972 in_left = 0;
1973 in_eof = 0;
1974 in_short = 0;
1975 in_tot = 0;
1976#ifndef NOTHREAD
1977 in_which = -1;
1978#endif
1979}
1980
1981/* buffered reading macros for decompression and listing */
1982#define GET() (in_eof || (in_left == 0 && load() == 0) ? EOF : \
1983 (in_left--, *in_next++))
Dees_Troy3bde1232012-09-22 08:10:28 -04001984#define GET2() (tmp2 = GET(), tmp2 + ((unsigned)(GET()) << 8))
Dees_Troy51a0e822012-09-05 15:24:24 -04001985#define GET4() (tmp4 = GET2(), tmp4 + ((unsigned long)(GET2()) << 16))
1986#define SKIP(dist) \
1987 do { \
1988 size_t togo = (dist); \
1989 while (togo > in_left) { \
1990 togo -= in_left; \
1991 if (load() == 0) \
1992 return -1; \
1993 } \
1994 in_left -= togo; \
1995 in_next += togo; \
1996 } while (0)
1997
Dees_Troy3bde1232012-09-22 08:10:28 -04001998/* pull LSB order or MSB order integers from an unsigned char buffer */
1999#define PULL2L(p) ((p)[0] + ((unsigned)((p)[1]) << 8))
2000#define PULL4L(p) (PULL2L(p) + ((unsigned long)(PULL2L((p) + 2)) << 16))
2001#define PULL2M(p) (((unsigned)((p)[0]) << 8) + (p)[1])
2002#define PULL4M(p) (((unsigned long)(PULL2M(p)) << 16) + PULL2M((p) + 2))
2003
Dees_Troy51a0e822012-09-05 15:24:24 -04002004/* convert MS-DOS date and time to a Unix time, assuming current timezone
2005 (you got a better idea?) */
2006local time_t dos2time(unsigned long dos)
2007{
2008 struct tm tm;
2009
2010 if (dos == 0)
2011 return time(NULL);
2012 tm.tm_year = ((int)(dos >> 25) & 0x7f) + 80;
2013 tm.tm_mon = ((int)(dos >> 21) & 0xf) - 1;
2014 tm.tm_mday = (int)(dos >> 16) & 0x1f;
2015 tm.tm_hour = (int)(dos >> 11) & 0x1f;
2016 tm.tm_min = (int)(dos >> 5) & 0x3f;
2017 tm.tm_sec = (int)(dos << 1) & 0x3e;
2018 tm.tm_isdst = -1; /* figure out if DST or not */
2019 return mktime(&tm);
2020}
2021
2022/* convert an unsigned 32-bit integer to signed, even if long > 32 bits */
2023local long tolong(unsigned long val)
2024{
2025 return (long)(val & 0x7fffffffUL) - (long)(val & 0x80000000UL);
2026}
2027
2028#define LOW32 0xffffffffUL
2029
2030/* process zip extra field to extract zip64 lengths and Unix mod time */
2031local int read_extra(unsigned len, int save)
2032{
2033 unsigned id, size, tmp2;
2034 unsigned long tmp4;
2035
2036 /* process extra blocks */
2037 while (len >= 4) {
2038 id = GET2();
2039 size = GET2();
2040 if (in_eof)
2041 return -1;
2042 len -= 4;
2043 if (size > len)
2044 break;
2045 len -= size;
2046 if (id == 0x0001) {
2047 /* Zip64 Extended Information Extra Field */
2048 if (zip_ulen == LOW32 && size >= 8) {
2049 zip_ulen = GET4();
2050 SKIP(4);
2051 size -= 8;
2052 }
2053 if (zip_clen == LOW32 && size >= 8) {
2054 zip_clen = GET4();
2055 SKIP(4);
2056 size -= 8;
2057 }
2058 }
2059 if (save) {
2060 if ((id == 0x000d || id == 0x5855) && size >= 8) {
2061 /* PKWare Unix or Info-ZIP Type 1 Unix block */
2062 SKIP(4);
2063 stamp = tolong(GET4());
2064 size -= 8;
2065 }
2066 if (id == 0x5455 && size >= 5) {
2067 /* Extended Timestamp block */
2068 size--;
2069 if (GET() & 1) {
2070 stamp = tolong(GET4());
2071 size -= 4;
2072 }
2073 }
2074 }
2075 SKIP(size);
2076 }
2077 SKIP(len);
2078 return 0;
2079}
2080
2081/* read a gzip, zip, zlib, or lzw header from ind and extract useful
2082 information, return the method -- or on error return negative: -1 is
2083 immediate EOF, -2 is not a recognized compressed format, -3 is premature EOF
2084 within the header, -4 is unexpected header flag values; a method of 256 is
2085 lzw -- set form to indicate gzip, zlib, or zip */
2086local int get_header(int save)
2087{
2088 unsigned magic; /* magic header */
2089 int method; /* compression method */
2090 int flags; /* header flags */
2091 unsigned fname, extra; /* name and extra field lengths */
2092 unsigned tmp2; /* for macro */
2093 unsigned long tmp4; /* for macro */
2094
2095 /* clear return information */
2096 if (save) {
2097 stamp = 0;
2098 RELEASE(hname);
2099 }
2100
2101 /* see if it's a gzip, zlib, or lzw file */
2102 form = 0;
Dees_Troy3bde1232012-09-22 08:10:28 -04002103 magic1 = GET();
Dees_Troy51a0e822012-09-05 15:24:24 -04002104 if (in_eof)
2105 return -1;
Dees_Troy3bde1232012-09-22 08:10:28 -04002106 magic = magic1 << 8;
Dees_Troy51a0e822012-09-05 15:24:24 -04002107 magic += GET();
2108 if (in_eof)
2109 return -2;
2110 if (magic % 31 == 0) { /* it's zlib */
2111 form = 1;
2112 return (int)((magic >> 8) & 0xf);
2113 }
2114 if (magic == 0x1f9d) /* it's lzw */
2115 return 256;
2116 if (magic == 0x504b) { /* it's zip */
2117 if (GET() != 3 || GET() != 4)
2118 return -3;
2119 SKIP(2);
2120 flags = GET2();
2121 if (in_eof)
2122 return -3;
2123 if (flags & 0xfff0)
2124 return -4;
2125 method = GET2();
2126 if (flags & 1) /* encrypted */
2127 method = 255; /* mark as unknown method */
2128 if (in_eof)
2129 return -3;
2130 if (save)
2131 stamp = dos2time(GET4());
2132 else
2133 SKIP(4);
2134 zip_crc = GET4();
2135 zip_clen = GET4();
2136 zip_ulen = GET4();
2137 fname = GET2();
2138 extra = GET2();
2139 if (save) {
2140 char *next = hname = malloc(fname + 1);
2141 if (hname == NULL)
2142 bail("not enough memory", "");
2143 while (fname > in_left) {
2144 memcpy(next, in_next, in_left);
2145 fname -= in_left;
2146 next += in_left;
2147 if (load() == 0)
2148 return -3;
2149 }
2150 memcpy(next, in_next, fname);
2151 in_left -= fname;
2152 in_next += fname;
2153 next += fname;
2154 *next = 0;
2155 }
2156 else
2157 SKIP(fname);
2158 read_extra(extra, save);
2159 form = 2 + ((flags & 8) >> 3);
2160 return in_eof ? -3 : method;
2161 }
Dees_Troy3bde1232012-09-22 08:10:28 -04002162 if (magic != 0x1f8b) { /* not gzip */
2163 in_left++; /* unget second magic byte */
2164 in_next--;
Dees_Troy51a0e822012-09-05 15:24:24 -04002165 return -2;
Dees_Troy3bde1232012-09-22 08:10:28 -04002166 }
Dees_Troy51a0e822012-09-05 15:24:24 -04002167
2168 /* it's gzip -- get method and flags */
2169 method = GET();
2170 flags = GET();
2171 if (in_eof)
2172 return -1;
2173 if (flags & 0xe0)
2174 return -4;
2175
2176 /* get time stamp */
2177 if (save)
2178 stamp = tolong(GET4());
2179 else
2180 SKIP(4);
2181
2182 /* skip extra field and OS */
2183 SKIP(2);
2184
2185 /* skip extra field, if present */
2186 if (flags & 4) {
2187 extra = GET2();
2188 if (in_eof)
2189 return -3;
2190 SKIP(extra);
2191 }
2192
2193 /* read file name, if present, into allocated memory */
2194 if ((flags & 8) && save) {
2195 unsigned char *end;
2196 size_t copy, have, size = 128;
2197 hname = malloc(size);
2198 if (hname == NULL)
2199 bail("not enough memory", "");
2200 have = 0;
2201 do {
2202 if (in_left == 0 && load() == 0)
2203 return -3;
2204 end = memchr(in_next, 0, in_left);
2205 copy = end == NULL ? in_left : (size_t)(end - in_next) + 1;
2206 if (have + copy > size) {
2207 while (have + copy > (size <<= 1))
2208 ;
2209 hname = realloc(hname, size);
2210 if (hname == NULL)
2211 bail("not enough memory", "");
2212 }
2213 memcpy(hname + have, in_next, copy);
2214 have += copy;
2215 in_left -= copy;
2216 in_next += copy;
2217 } while (end == NULL);
2218 }
2219 else if (flags & 8)
2220 while (GET() != 0)
2221 if (in_eof)
2222 return -3;
2223
2224 /* skip comment */
2225 if (flags & 16)
2226 while (GET() != 0)
2227 if (in_eof)
2228 return -3;
2229
2230 /* skip header crc */
2231 if (flags & 2)
2232 SKIP(2);
2233
2234 /* return compression method */
2235 return method;
2236}
2237
2238/* --- list contents of compressed input (gzip, zlib, or lzw) */
2239
2240/* find standard compressed file suffix, return length of suffix */
2241local size_t compressed_suffix(char *nm)
2242{
2243 size_t len;
2244
2245 len = strlen(nm);
2246 if (len > 4) {
2247 nm += len - 4;
2248 len = 4;
2249 if (strcmp(nm, ".zip") == 0 || strcmp(nm, ".ZIP") == 0 ||
2250 strcmp(nm, ".tgz") == 0)
2251 return 4;
2252 }
2253 if (len > 3) {
2254 nm += len - 3;
2255 len = 3;
2256 if (strcmp(nm, ".gz") == 0 || strcmp(nm, "-gz") == 0 ||
2257 strcmp(nm, ".zz") == 0 || strcmp(nm, "-zz") == 0)
2258 return 3;
2259 }
2260 if (len > 2) {
2261 nm += len - 2;
2262 if (strcmp(nm, ".z") == 0 || strcmp(nm, "-z") == 0 ||
2263 strcmp(nm, "_z") == 0 || strcmp(nm, ".Z") == 0)
2264 return 2;
2265 }
2266 return 0;
2267}
2268
2269/* listing file name lengths for -l and -lv */
2270#define NAMEMAX1 48 /* name display limit at verbosity 1 */
2271#define NAMEMAX2 16 /* name display limit at verbosity 2 */
2272
2273/* print gzip or lzw file information */
2274local void show_info(int method, unsigned long check, off_t len, int cont)
2275{
2276 size_t max; /* maximum name length for current verbosity */
2277 size_t n; /* name length without suffix */
2278 time_t now; /* for getting current year */
2279 char mod[26]; /* modification time in text */
2280 char name[NAMEMAX1+1]; /* header or file name, possibly truncated */
2281
2282 /* create abbreviated name from header file name or actual file name */
2283 max = verbosity > 1 ? NAMEMAX2 : NAMEMAX1;
2284 memset(name, 0, max + 1);
2285 if (cont)
2286 strncpy(name, "<...>", max + 1);
2287 else if (hname == NULL) {
2288 n = strlen(in) - compressed_suffix(in);
2289 strncpy(name, in, n > max + 1 ? max + 1 : n);
Dees_Troy3bde1232012-09-22 08:10:28 -04002290 if (strcmp(in + n, ".tgz") == 0 && n < max + 1)
2291 strncpy(name + n, ".tar", max + 1 - n);
Dees_Troy51a0e822012-09-05 15:24:24 -04002292 }
2293 else
2294 strncpy(name, hname, max + 1);
2295 if (name[max])
2296 strcpy(name + max - 3, "...");
2297
2298 /* convert time stamp to text */
2299 if (stamp) {
2300 strcpy(mod, ctime(&stamp));
2301 now = time(NULL);
2302 if (strcmp(mod + 20, ctime(&now) + 20) != 0)
2303 strcpy(mod + 11, mod + 19);
2304 }
2305 else
2306 strcpy(mod + 4, "------ -----");
2307 mod[16] = 0;
2308
2309 /* if first time, print header */
2310 if (first) {
2311 if (verbosity > 1)
2312 fputs("method check timestamp ", stdout);
2313 if (verbosity > 0)
2314 puts("compressed original reduced name");
2315 first = 0;
2316 }
2317
2318 /* print information */
2319 if (verbosity > 1) {
2320 if (form == 3 && !decode)
2321 printf("zip%3d -------- %s ", method, mod + 4);
2322 else if (form > 1)
2323 printf("zip%3d %08lx %s ", method, check, mod + 4);
2324 else if (form)
2325 printf("zlib%2d %08lx %s ", method, check, mod + 4);
2326 else if (method == 256)
2327 printf("lzw -------- %s ", mod + 4);
2328 else
2329 printf("gzip%2d %08lx %s ", method, check, mod + 4);
2330 }
2331 if (verbosity > 0) {
2332 if ((form == 3 && !decode) ||
2333 (method == 8 && in_tot > (len + (len >> 10) + 12)) ||
2334 (method == 256 && in_tot > len + (len >> 1) + 3))
Dees_Troy3bde1232012-09-22 08:10:28 -04002335#if __STDC_VERSION__-0 >= 199901L || __GNUC__-0 >= 3
2336 printf("%10jd %10jd? unk %s\n",
2337 (intmax_t)in_tot, (intmax_t)len, name);
2338 else
2339 printf("%10jd %10jd %6.1f%% %s\n",
2340 (intmax_t)in_tot, (intmax_t)len,
2341 len == 0 ? 0 : 100 * (len - in_tot)/(double)len,
2342 name);
2343#else
2344 printf(sizeof(off_t) == sizeof(long) ?
2345 "%10ld %10ld? unk %s\n" : "%10lld %10lld? unk %s\n",
Dees_Troy51a0e822012-09-05 15:24:24 -04002346 in_tot, len, name);
2347 else
Dees_Troy3bde1232012-09-22 08:10:28 -04002348 printf(sizeof(off_t) == sizeof(long) ?
2349 "%10ld %10ld %6.1f%% %s\n" : "%10lld %10lld %6.1f%% %s\n",
Dees_Troy51a0e822012-09-05 15:24:24 -04002350 in_tot, len,
2351 len == 0 ? 0 : 100 * (len - in_tot)/(double)len,
2352 name);
Dees_Troy3bde1232012-09-22 08:10:28 -04002353#endif
Dees_Troy51a0e822012-09-05 15:24:24 -04002354 }
2355}
2356
2357/* list content information about the gzip file at ind (only works if the gzip
2358 file contains a single gzip stream with no junk at the end, and only works
2359 well if the uncompressed length is less than 4 GB) */
2360local void list_info(void)
2361{
2362 int method; /* get_header() return value */
2363 size_t n; /* available trailer bytes */
2364 off_t at; /* used to calculate compressed length */
2365 unsigned char tail[8]; /* trailer containing check and length */
2366 unsigned long check, len; /* check value and length from trailer */
2367
2368 /* initialize input buffer */
2369 in_init();
2370
2371 /* read header information and position input after header */
2372 method = get_header(1);
2373 if (method < 0) {
2374 RELEASE(hname);
2375 if (method != -1 && verbosity > 1)
Dees_Troy3bde1232012-09-22 08:10:28 -04002376 complain("%s not a compressed file -- skipping", in);
Dees_Troy51a0e822012-09-05 15:24:24 -04002377 return;
2378 }
2379
2380 /* list zip file */
2381 if (form > 1) {
2382 in_tot = zip_clen;
2383 show_info(method, zip_crc, zip_ulen, 0);
2384 return;
2385 }
2386
2387 /* list zlib file */
2388 if (form) {
2389 at = lseek(ind, 0, SEEK_END);
2390 if (at == -1) {
2391 check = 0;
2392 do {
2393 len = in_left < 4 ? in_left : 4;
2394 in_next += in_left - len;
2395 while (len--)
2396 check = (check << 8) + *in_next++;
2397 } while (load() != 0);
2398 check &= LOW32;
2399 }
2400 else {
2401 in_tot = at;
2402 lseek(ind, -4, SEEK_END);
2403 readn(ind, tail, 4);
Dees_Troy3bde1232012-09-22 08:10:28 -04002404 check = PULL4M(tail);
Dees_Troy51a0e822012-09-05 15:24:24 -04002405 }
2406 in_tot -= 6;
2407 show_info(method, check, 0, 0);
2408 return;
2409 }
2410
2411 /* list lzw file */
2412 if (method == 256) {
2413 at = lseek(ind, 0, SEEK_END);
2414 if (at == -1)
2415 while (load() != 0)
2416 ;
2417 else
2418 in_tot = at;
2419 in_tot -= 3;
2420 show_info(method, 0, 0, 0);
2421 return;
2422 }
2423
2424 /* skip to end to get trailer (8 bytes), compute compressed length */
2425 if (in_short) { /* whole thing already read */
2426 if (in_left < 8) {
Dees_Troy3bde1232012-09-22 08:10:28 -04002427 complain("%s not a valid gzip file -- skipping", in);
Dees_Troy51a0e822012-09-05 15:24:24 -04002428 return;
2429 }
2430 in_tot = in_left - 8; /* compressed size */
2431 memcpy(tail, in_next + (in_left - 8), 8);
2432 }
2433 else if ((at = lseek(ind, -8, SEEK_END)) != -1) {
2434 in_tot = at - in_tot + in_left; /* compressed size */
2435 readn(ind, tail, 8); /* get trailer */
2436 }
2437 else { /* can't seek */
2438 at = in_tot - in_left; /* save header size */
2439 do {
2440 n = in_left < 8 ? in_left : 8;
2441 memcpy(tail, in_next + (in_left - n), n);
2442 load();
2443 } while (in_left == BUF); /* read until end */
2444 if (in_left < 8) {
2445 if (n + in_left < 8) {
Dees_Troy3bde1232012-09-22 08:10:28 -04002446 complain("%s not a valid gzip file -- skipping", in);
Dees_Troy51a0e822012-09-05 15:24:24 -04002447 return;
2448 }
2449 if (in_left) {
2450 if (n + in_left > 8)
2451 memcpy(tail, tail + n - (8 - in_left), 8 - in_left);
2452 memcpy(tail + 8 - in_left, in_next, in_left);
2453 }
2454 }
2455 else
2456 memcpy(tail, in_next + (in_left - 8), 8);
2457 in_tot -= at + 8;
2458 }
2459 if (in_tot < 2) {
Dees_Troy3bde1232012-09-22 08:10:28 -04002460 complain("%s not a valid gzip file -- skipping", in);
Dees_Troy51a0e822012-09-05 15:24:24 -04002461 return;
2462 }
2463
2464 /* convert trailer to check and uncompressed length (modulo 2^32) */
Dees_Troy3bde1232012-09-22 08:10:28 -04002465 check = PULL4L(tail);
2466 len = PULL4L(tail + 4);
Dees_Troy51a0e822012-09-05 15:24:24 -04002467
2468 /* list information about contents */
2469 show_info(method, check, len, 0);
2470 RELEASE(hname);
2471}
2472
Dees_Troy3bde1232012-09-22 08:10:28 -04002473/* --- copy input to output (when acting like cat) --- */
2474
2475local void cat(void)
2476{
2477 /* write first magic byte (if we're here, there's at least one byte) */
2478 writen(outd, &magic1, 1);
2479 out_tot = 1;
2480
2481 /* copy the remainder of the input to the output (if there were any more
2482 bytes of input, then in_left is non-zero and in_next is pointing to the
2483 second magic byte) */
2484 while (in_left) {
2485 writen(outd, in_next, in_left);
2486 out_tot += in_left;
2487 in_left = 0;
2488 load();