Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1 | /* pigz.c -- parallel implementation of gzip |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 2 | * Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012 Mark Adler |
| 3 | * Version 2.2.5 28 Jul 2012 Mark Adler |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 4 | */ |
| 5 | |
| 6 | /* |
| 7 | This software is provided 'as-is', without any express or implied |
| 8 | warranty. In no event will the author be held liable for any damages |
| 9 | arising from the use of this software. |
| 10 | |
| 11 | Permission is granted to anyone to use this software for any purpose, |
| 12 | including commercial applications, and to alter it and redistribute it |
| 13 | freely, subject to the following restrictions: |
| 14 | |
| 15 | 1. The origin of this software must not be misrepresented; you must not |
| 16 | claim that you wrote the original software. If you use this software |
| 17 | in a product, an acknowledgment in the product documentation would be |
| 18 | appreciated but is not required. |
| 19 | 2. Altered source versions must be plainly marked as such, and must not be |
| 20 | misrepresented as being the original software. |
| 21 | 3. This notice may not be removed or altered from any source distribution. |
| 22 | |
| 23 | Mark Adler |
| 24 | madler@alumni.caltech.edu |
| 25 | |
| 26 | Mark accepts donations for providing this software. Donations are not |
| 27 | required or expected. Any amount that you feel is appropriate would be |
| 28 | appreciated. You can use this link: |
| 29 | |
| 30 | https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=536055 |
| 31 | |
| 32 | */ |
| 33 | |
| 34 | /* Version history: |
| 35 | 1.0 17 Jan 2007 First version, pipe only |
| 36 | 1.1 28 Jan 2007 Avoid void * arithmetic (some compilers don't get that) |
| 37 | Add note about requiring zlib 1.2.3 |
| 38 | Allow compression level 0 (no compression) |
| 39 | Completely rewrite parallelism -- add a write thread |
| 40 | Use deflateSetDictionary() to make use of history |
| 41 | Tune argument defaults to best performance on four cores |
| 42 | 1.2.1 1 Feb 2007 Add long command line options, add all gzip options |
| 43 | Add debugging options |
| 44 | 1.2.2 19 Feb 2007 Add list (--list) function |
| 45 | Process file names on command line, write .gz output |
| 46 | Write name and time in gzip header, set output file time |
| 47 | Implement all command line options except --recursive |
| 48 | Add --keep option to prevent deleting input files |
| 49 | Add thread tracing information with -vv used |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 50 | Copy crc32_combine() from zlib (shared libraries issue) |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 51 | 1.3 25 Feb 2007 Implement --recursive |
| 52 | Expand help to show all options |
| 53 | Show help if no arguments or output piping are provided |
| 54 | Process options in GZIP environment variable |
| 55 | Add progress indicator to write thread if --verbose |
| 56 | 1.4 4 Mar 2007 Add --independent to facilitate damaged file recovery |
| 57 | Reallocate jobs for new --blocksize or --processes |
| 58 | Do not delete original if writing to stdout |
| 59 | Allow --processes 1, which does no threading |
| 60 | Add NOTHREAD define to compile without threads |
| 61 | Incorporate license text from zlib in source code |
| 62 | 1.5 25 Mar 2007 Reinitialize jobs for new compression level |
| 63 | Copy attributes and owner from input file to output file |
| 64 | Add decompression and testing |
| 65 | Add -lt (or -ltv) to show all entries and proper lengths |
| 66 | Add decompression, testing, listing of LZW (.Z) files |
| 67 | Only generate and show trace log if DEBUG defined |
| 68 | Take "-" argument to mean read file from stdin |
| 69 | 1.6 30 Mar 2007 Add zlib stream compression (--zlib), and decompression |
| 70 | 1.7 29 Apr 2007 Decompress first entry of a zip file (if deflated) |
| 71 | Avoid empty deflate blocks at end of deflate stream |
| 72 | Show zlib check value (Adler-32) when listing |
| 73 | Don't complain when decompressing empty file |
| 74 | Warn about trailing junk for gzip and zlib streams |
| 75 | Make listings consistent, ignore gzip extra flags |
| 76 | Add zip stream compression (--zip) |
| 77 | 1.8 13 May 2007 Document --zip option in help output |
| 78 | 2.0 19 Oct 2008 Complete rewrite of thread usage and synchronization |
| 79 | Use polling threads and a pool of memory buffers |
| 80 | Remove direct pthread library use, hide in yarn.c |
| 81 | 2.0.1 20 Oct 2008 Check version of zlib at compile time, need >= 1.2.3 |
| 82 | 2.1 24 Oct 2008 Decompress with read, write, inflate, and check threads |
| 83 | Remove spurious use of ctime_r(), ctime() more portable |
| 84 | Change application of job->calc lock to be a semaphore |
| 85 | Detect size of off_t at run time to select %lu vs. %llu |
| 86 | #define large file support macro even if not __linux__ |
| 87 | Remove _LARGEFILE64_SOURCE, _FILE_OFFSET_BITS is enough |
| 88 | Detect file-too-large error and report, blame build |
| 89 | Replace check combination routines with those from zlib |
| 90 | 2.1.1 28 Oct 2008 Fix a leak for files with an integer number of blocks |
| 91 | Update for yarn 1.1 (yarn_prefix and yarn_abort) |
| 92 | 2.1.2 30 Oct 2008 Work around use of beta zlib in production systems |
| 93 | 2.1.3 8 Nov 2008 Don't use zlib combination routines, put back in pigz |
| 94 | 2.1.4 9 Nov 2008 Fix bug when decompressing very short files |
| 95 | 2.1.5 20 Jul 2009 Added 2008, 2009 to --license statement |
| 96 | Allow numeric parameter immediately after -p or -b |
| 97 | Enforce parameter after -p, -b, -s, before other options |
| 98 | Enforce numeric parameters to have only numeric digits |
| 99 | Try to determine the number of processors for -p default |
| 100 | Fix --suffix short option to be -S to match gzip [Bloch] |
| 101 | Decompress if executable named "unpigz" [Amundsen] |
| 102 | Add a little bit of testing to Makefile |
| 103 | 2.1.6 17 Jan 2010 Added pigz.spec to distribution for RPM systems [Brown] |
| 104 | Avoid some compiler warnings |
| 105 | Process symbolic links if piping to stdout [Hoffstätte] |
| 106 | Decompress if executable named "gunzip" [Hoffstätte] |
| 107 | Allow ".tgz" suffix [Chernookiy] |
| 108 | Fix adler32 comparison on .zz files |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 109 | 2.1.7 17 Dec 2011 Avoid unused parameter warning in reenter() |
| 110 | Don't assume 2's complement ints in compress_thread() |
| 111 | Replicate gzip -cdf cat-like behavior |
| 112 | Replicate gzip -- option to suppress option decoding |
| 113 | Test output from make test instead of showing it |
| 114 | Updated pigz.spec to install unpigz, pigz.1 [Obermaier] |
| 115 | Add PIGZ environment variable [Mueller] |
| 116 | Replicate gzip suffix search when decoding or listing |
| 117 | Fix bug in load() to set in_left to zero on end of file |
| 118 | Do not check suffix when input file won't be modified |
| 119 | Decompress to stdout if name is "*cat" [Hayasaka] |
| 120 | Write data descriptor signature to be like Info-ZIP |
| 121 | Update and sort options list in help |
| 122 | Use CC variable for compiler in Makefile |
| 123 | Exit with code 2 if a warning has been issued |
| 124 | Fix thread synchronization problem when tracing |
| 125 | Change macro name MAX to MAX2 to avoid library conflicts |
| 126 | Determine number of processors on HP-UX [Lloyd] |
| 127 | 2.2 31 Dec 2011 Check for expansion bound busting (e.g. modified zlib) |
| 128 | Make the "threads" list head global variable volatile |
| 129 | Fix construction and printing of 32-bit check values |
| 130 | Add --rsyncable functionality |
| 131 | 2.2.1 1 Jan 2012 Fix bug in --rsyncable buffer management |
| 132 | 2.2.2 1 Jan 2012 Fix another bug in --rsyncable buffer management |
| 133 | 2.2.3 15 Jan 2012 Remove volatile in yarn.c |
| 134 | Reduce the number of input buffers |
| 135 | Change initial rsyncable hash to comparison value |
| 136 | Improve the efficiency of arriving at a byte boundary |
| 137 | Add thread portability #defines from yarn.c |
| 138 | Have rsyncable compression be independent of threading |
| 139 | Fix bug where constructed dictionaries not being used |
| 140 | 2.2.4 11 Mar 2012 Avoid some return value warnings |
| 141 | Improve the portability of printing the off_t type |
| 142 | Check for existence of compress binary before using |
| 143 | Update zlib version checking to 1.2.6 for new functions |
| 144 | Fix bug in zip (-K) output |
| 145 | Fix license in pigz.spec |
| 146 | Remove thread portability #defines in pigz.c |
| 147 | 2.2.5 28 Jul 2012 Avoid race condition in free_pool() |
| 148 | Change suffix to .tar when decompressing or listing .tgz |
| 149 | Print name of executable in error messages |
| 150 | Show help properly when the name is unpigz or gunzip |
| 151 | Fix permissions security problem before output is closed |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 152 | */ |
| 153 | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 154 | #define VERSION "pigz 2.2.5\n" |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 155 | |
| 156 | /* To-do: |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 157 | - make source portable for Windows, VMS, etc. (see gzip source code) |
| 158 | - make build portable (currently good for Unixish) |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 159 | */ |
| 160 | |
| 161 | /* |
| 162 | pigz compresses using threads to make use of multiple processors and cores. |
| 163 | The input is broken up into 128 KB chunks with each compressed in parallel. |
| 164 | The individual check value for each chunk is also calculated in parallel. |
| 165 | The compressed data is written in order to the output, and a combined check |
| 166 | value is calculated from the individual check values. |
| 167 | |
| 168 | The compressed data format generated is in the gzip, zlib, or single-entry |
| 169 | zip format using the deflate compression method. The compression produces |
| 170 | partial raw deflate streams which are concatenated by a single write thread |
| 171 | and wrapped with the appropriate header and trailer, where the trailer |
| 172 | contains the combined check value. |
| 173 | |
| 174 | Each partial raw deflate stream is terminated by an empty stored block |
| 175 | (using the Z_SYNC_FLUSH option of zlib), in order to end that partial bit |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 176 | stream at a byte boundary, unless that partial stream happens to already end |
| 177 | at a byte boundary (the latter requires zlib 1.2.6 or later). Ending on a |
| 178 | byte boundary allows the partial streams to be concatenated simply as |
| 179 | sequences of bytes. This adds a very small four to five byte overhead |
| 180 | (average 3.75 bytes) to the output for each input chunk. |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 181 | |
| 182 | The default input block size is 128K, but can be changed with the -b option. |
| 183 | The number of compress threads is set by default to 8, which can be changed |
| 184 | using the -p option. Specifying -p 1 avoids the use of threads entirely. |
| 185 | pigz will try to determine the number of processors in the machine, in which |
| 186 | case if that number is two or greater, pigz will use that as the default for |
| 187 | -p instead of 8. |
| 188 | |
| 189 | The input blocks, while compressed independently, have the last 32K of the |
| 190 | previous block loaded as a preset dictionary to preserve the compression |
| 191 | effectiveness of deflating in a single thread. This can be turned off using |
| 192 | the --independent or -i option, so that the blocks can be decompressed |
| 193 | independently for partial error recovery or for random access. |
| 194 | |
| 195 | Decompression can't be parallelized, at least not without specially prepared |
| 196 | deflate streams for that purpose. As a result, pigz uses a single thread |
| 197 | (the main thread) for decompression, but will create three other threads for |
| 198 | reading, writing, and check calculation, which can speed up decompression |
| 199 | under some circumstances. Parallel decompression can be turned off by |
| 200 | specifying one process (-dp 1 or -tp 1). |
| 201 | |
| 202 | pigz requires zlib 1.2.1 or later to allow setting the dictionary when doing |
| 203 | raw deflate. Since zlib 1.2.3 corrects security vulnerabilities in zlib |
| 204 | version 1.2.1 and 1.2.2, conditionals check for zlib 1.2.3 or later during |
| 205 | the compilation of pigz.c. zlib 1.2.4 includes some improvements to |
| 206 | Z_FULL_FLUSH and deflateSetDictionary() that permit identical output for |
| 207 | pigz with and without threads, which is not possible with zlib 1.2.3. This |
| 208 | may be important for uses of pigz -R where small changes in the contents |
| 209 | should result in small changes in the archive for rsync. Note that due to |
| 210 | the details of how the lower levels of compression result in greater speed, |
| 211 | compression level 3 and below does not permit identical pigz output with |
| 212 | and without threads. |
| 213 | |
| 214 | pigz uses the POSIX pthread library for thread control and communication, |
| 215 | through the yarn.h interface to yarn.c. yarn.c can be replaced with |
| 216 | equivalent implementations using other thread libraries. pigz can be |
| 217 | compiled with NOTHREAD #defined to not use threads at all (in which case |
| 218 | pigz will not be able to live up to the "parallel" in its name). |
| 219 | */ |
| 220 | |
| 221 | /* |
| 222 | Details of parallel compression implementation: |
| 223 | |
| 224 | When doing parallel compression, pigz uses the main thread to read the input |
| 225 | in 'size' sized chunks (see -b), and puts those in a compression job list, |
| 226 | each with a sequence number to keep track of the ordering. If it is not the |
| 227 | first chunk, then that job also points to the previous input buffer, from |
| 228 | which the last 32K will be used as a dictionary (unless -i is specified). |
| 229 | This sets a lower limit of 32K on 'size'. |
| 230 | |
| 231 | pigz launches up to 'procs' compression threads (see -p). Each compression |
| 232 | thread continues to look for jobs in the compression list and perform those |
| 233 | jobs until instructed to return. When a job is pulled, the dictionary, if |
| 234 | provided, will be loaded into the deflate engine and then that input buffer |
| 235 | is dropped for reuse. Then the input data is compressed into an output |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 236 | buffer that grows in size if necessary to hold the compressed data. The job |
| 237 | is then put into the write job list, sorted by the sequence number. The |
| 238 | compress thread however continues to calculate the check value on the input |
| 239 | data, either a CRC-32 or Adler-32, possibly in parallel with the write |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 240 | thread writing the output data. Once that's done, the compress thread drops |
| 241 | the input buffer and also releases the lock on the check value so that the |
| 242 | write thread can combine it with the previous check values. The compress |
| 243 | thread has then completed that job, and goes to look for another. |
| 244 | |
| 245 | All of the compress threads are left running and waiting even after the last |
| 246 | chunk is processed, so that they can support the next input to be compressed |
| 247 | (more than one input file on the command line). Once pigz is done, it will |
| 248 | call all the compress threads home (that'll do pig, that'll do). |
| 249 | |
| 250 | Before starting to read the input, the main thread launches the write thread |
| 251 | so that it is ready pick up jobs immediately. The compress thread puts the |
| 252 | write jobs in the list in sequence sorted order, so that the first job in |
| 253 | the list is always has the lowest sequence number. The write thread waits |
| 254 | for the next write job in sequence, and then gets that job. The job still |
| 255 | holds its input buffer, from which the write thread gets the input buffer |
| 256 | length for use in check value combination. Then the write thread drops that |
| 257 | input buffer to allow its reuse. Holding on to the input buffer until the |
| 258 | write thread starts also has the benefit that the read and compress threads |
| 259 | can't get way ahead of the write thread and build up a large backlog of |
| 260 | unwritten compressed data. The write thread will write the compressed data, |
| 261 | drop the output buffer, and then wait for the check value to be unlocked |
| 262 | by the compress thread. Then the write thread combines the check value for |
| 263 | this chunk with the total check value for eventual use in the trailer. If |
| 264 | this is not the last chunk, the write thread then goes back to look for the |
| 265 | next output chunk in sequence. After the last chunk, the write thread |
| 266 | returns and joins the main thread. Unlike the compress threads, a new write |
| 267 | thread is launched for each input stream. The write thread writes the |
| 268 | appropriate header and trailer around the compressed data. |
| 269 | |
| 270 | The input and output buffers are reused through their collection in pools. |
| 271 | Each buffer has a use count, which when decremented to zero returns the |
| 272 | buffer to the respective pool. Each input buffer has up to three parallel |
| 273 | uses: as the input for compression, as the data for the check value |
| 274 | calculation, and as a dictionary for compression. Each output buffer has |
| 275 | only one use, which is as the output of compression followed serially as |
| 276 | data to be written. The input pool is limited in the number of buffers, so |
| 277 | that reading does not get way ahead of compression and eat up memory with |
| 278 | more input than can be used. The limit is approximately two times the |
| 279 | number of compression threads. In the case that reading is fast as compared |
| 280 | to compression, that number allows a second set of buffers to be read while |
| 281 | the first set of compressions are being performed. The number of output |
| 282 | buffers is not directly limited, but is indirectly limited by the release of |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 283 | input buffers to about the same number. |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 284 | */ |
| 285 | |
| 286 | /* use large file functions if available */ |
| 287 | #define _FILE_OFFSET_BITS 64 |
| 288 | |
| 289 | /* included headers and what is expected from each */ |
| 290 | #include <stdio.h> /* fflush(), fprintf(), fputs(), getchar(), putc(), */ |
| 291 | /* puts(), printf(), vasprintf(), stderr, EOF, NULL, |
| 292 | SEEK_END, size_t, off_t */ |
| 293 | #include <stdlib.h> /* exit(), malloc(), free(), realloc(), atol(), */ |
| 294 | /* atoi(), getenv() */ |
| 295 | #include <stdarg.h> /* va_start(), va_end(), va_list */ |
| 296 | #include <string.h> /* memset(), memchr(), memcpy(), strcmp(), strcpy() */ |
| 297 | /* strncpy(), strlen(), strcat(), strrchr() */ |
| 298 | #include <errno.h> /* errno, EEXIST */ |
| 299 | #include <assert.h> /* assert() */ |
| 300 | #include <time.h> /* ctime(), time(), time_t, mktime() */ |
| 301 | #include <signal.h> /* signal(), SIGINT */ |
| 302 | #include <sys/types.h> /* ssize_t */ |
| 303 | #include <sys/stat.h> /* chmod(), stat(), fstat(), lstat(), struct stat, */ |
| 304 | /* S_IFDIR, S_IFLNK, S_IFMT, S_IFREG */ |
| 305 | #include <sys/time.h> /* utimes(), gettimeofday(), struct timeval */ |
| 306 | #include <unistd.h> /* unlink(), _exit(), read(), write(), close(), */ |
| 307 | /* lseek(), isatty(), chown() */ |
| 308 | #include <fcntl.h> /* open(), O_CREAT, O_EXCL, O_RDONLY, O_TRUNC, */ |
| 309 | /* O_WRONLY */ |
| 310 | #include <dirent.h> /* opendir(), readdir(), closedir(), DIR, */ |
| 311 | /* struct dirent */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 312 | #include <limits.h> /* PATH_MAX, UINT_MAX */ |
| 313 | #if __STDC_VERSION__-0 >= 199901L || __GNUC__-0 >= 3 |
| 314 | # include <inttypes.h> /* intmax_t */ |
| 315 | #endif |
| 316 | |
| 317 | #ifdef __hpux |
| 318 | # include <sys/param.h> |
| 319 | # include <sys/pstat.h> |
| 320 | #endif |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 321 | |
| 322 | #include "zlib.h" /* deflateInit2(), deflateReset(), deflate(), */ |
| 323 | /* deflateEnd(), deflateSetDictionary(), crc32(), |
| 324 | inflateBackInit(), inflateBack(), inflateBackEnd(), |
| 325 | Z_DEFAULT_COMPRESSION, Z_DEFAULT_STRATEGY, |
| 326 | Z_DEFLATED, Z_NO_FLUSH, Z_NULL, Z_OK, |
| 327 | Z_SYNC_FLUSH, z_stream */ |
| 328 | #if !defined(ZLIB_VERNUM) || ZLIB_VERNUM < 0x1230 |
| 329 | # error Need zlib version 1.2.3 or later |
| 330 | #endif |
| 331 | |
| 332 | #ifndef NOTHREAD |
| 333 | # include "yarn.h" /* thread, launch(), join(), join_all(), */ |
| 334 | /* lock, new_lock(), possess(), twist(), wait_for(), |
| 335 | release(), peek_lock(), free_lock(), yarn_name */ |
| 336 | #endif |
| 337 | |
| 338 | /* for local functions and globals */ |
| 339 | #define local static |
| 340 | |
| 341 | /* prevent end-of-line conversions on MSDOSish operating systems */ |
| 342 | #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) |
| 343 | # include <io.h> /* setmode(), O_BINARY */ |
| 344 | # define SET_BINARY_MODE(fd) setmode(fd, O_BINARY) |
| 345 | #else |
| 346 | # define SET_BINARY_MODE(fd) |
| 347 | #endif |
| 348 | |
| 349 | /* release an allocated pointer, if allocated, and mark as unallocated */ |
| 350 | #define RELEASE(ptr) \ |
| 351 | do { \ |
| 352 | if ((ptr) != NULL) { \ |
| 353 | free(ptr); \ |
| 354 | ptr = NULL; \ |
| 355 | } \ |
| 356 | } while (0) |
| 357 | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 358 | /* sliding dictionary size for deflate */ |
| 359 | #define DICT 32768U |
| 360 | |
| 361 | /* largest power of 2 that fits in an unsigned int -- used to limit requests |
| 362 | to zlib functions that use unsigned int lengths */ |
| 363 | #define MAXP2 (UINT_MAX - (UINT_MAX >> 1)) |
| 364 | |
| 365 | /* rsyncable constants -- RSYNCBITS is the number of bits in the mask for |
| 366 | comparison. For random input data, there will be a hit on average every |
| 367 | 1<<RSYNCBITS bytes. So for an RSYNCBITS of 12, there will be an average of |
| 368 | one hit every 4096 bytes, resulting in a mean block size of 4096. RSYNCMASK |
| 369 | is the resulting bit mask. RSYNCHIT is what the hash value is compared to |
| 370 | after applying the mask. |
| 371 | |
| 372 | The choice of 12 for RSYNCBITS is consistent with the original rsyncable |
| 373 | patch for gzip which also uses a 12-bit mask. This results in a relatively |
| 374 | small hit to compression, on the order of 1.5% to 3%. A mask of 13 bits can |
| 375 | be used instead if a hit of less than 1% to the compression is desired, at |
| 376 | the expense of more blocks transmitted for rsync updates. (Your mileage may |
| 377 | vary.) |
| 378 | |
| 379 | This implementation of rsyncable uses a different hash algorithm than what |
| 380 | the gzip rsyncable patch uses in order to provide better performance in |
| 381 | several regards. The algorithm is simply to shift the hash value left one |
| 382 | bit and exclusive-or that with the next byte. This is masked to the number |
| 383 | of hash bits (RSYNCMASK) and compared to all ones except for a zero in the |
| 384 | top bit (RSYNCHIT). This rolling hash has a very small window of 19 bytes |
| 385 | (RSYNCBITS+7). The small window provides the benefit of much more rapid |
| 386 | resynchronization after a change, than does the 4096-byte window of the gzip |
| 387 | rsyncable patch. |
| 388 | |
| 389 | The comparison value is chosen to avoid matching any repeated bytes or short |
| 390 | sequences. The gzip rsyncable patch on the other hand uses a sum and zero |
| 391 | for comparison, which results in certain bad behaviors, such as always |
| 392 | matching everywhere in a long sequence of zeros. Such sequences occur |
| 393 | frequently in tar files. |
| 394 | |
| 395 | This hash efficiently discards history older than 19 bytes simply by |
| 396 | shifting that data past the top of the mask -- no history needs to be |
| 397 | retained to undo its impact on the hash value, as is needed for a sum. |
| 398 | |
| 399 | The choice of the comparison value (RSYNCHIT) has the virtue of avoiding |
| 400 | extremely short blocks. The shortest block is five bytes (RSYNCBITS-7) from |
| 401 | hit to hit, and is unlikely. Whereas with the gzip rsyncable algorithm, |
| 402 | blocks of one byte are not only possible, but in fact are the most likely |
| 403 | block size. |
| 404 | |
| 405 | Thanks and acknowledgement to Kevin Day for his experimentation and insights |
| 406 | on rsyncable hash characteristics that led to some of the choices here. |
| 407 | */ |
| 408 | #define RSYNCBITS 12 |
| 409 | #define RSYNCMASK ((1U << RSYNCBITS) - 1) |
| 410 | #define RSYNCHIT (RSYNCMASK >> 1) |
| 411 | |
| 412 | /* initial pool counts and sizes -- INBUFS is the limit on the number of input |
| 413 | spaces as a function of the number of processors (used to throttle the |
| 414 | creation of compression jobs), OUTPOOL is the initial size of the output |
| 415 | data buffer, chosen to make resizing of the buffer very unlikely */ |
| 416 | #define INBUFS(p) (((p)<<1)+3) |
| 417 | #define OUTPOOL(s) ((s)+((s)>>4)) |
| 418 | |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 419 | /* globals (modified by main thread only when it's the only thread) */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 420 | local char *prog; /* name by which pigz was invoked */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 421 | local int ind; /* input file descriptor */ |
| 422 | local int outd; /* output file descriptor */ |
| 423 | local char in[PATH_MAX+1]; /* input file name (accommodate recursion) */ |
| 424 | local char *out = NULL; /* output file name (allocated if not NULL) */ |
| 425 | local int verbosity; /* 0 = quiet, 1 = normal, 2 = verbose, 3 = trace */ |
| 426 | local int headis; /* 1 to store name, 2 to store date, 3 both */ |
| 427 | local int pipeout; /* write output to stdout even if file */ |
| 428 | local int keep; /* true to prevent deletion of input file */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 429 | local int force; /* true to overwrite, compress links, cat */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 430 | local int form; /* gzip = 0, zlib = 1, zip = 2 or 3 */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 431 | local unsigned char magic1; /* first byte of possible header when decoding */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 432 | local int recurse; /* true to dive down into directory structure */ |
| 433 | local char *sufx; /* suffix to use (".gz" or user supplied) */ |
| 434 | local char *name; /* name for gzip header */ |
| 435 | local time_t mtime; /* time stamp from input file for gzip header */ |
| 436 | local int list; /* true to list files instead of compress */ |
| 437 | local int first = 1; /* true if we need to print listing header */ |
| 438 | local int decode; /* 0 to compress, 1 to decompress, 2 to test */ |
| 439 | local int level; /* compression level */ |
| 440 | local int rsync; /* true for rsync blocking */ |
| 441 | local int procs; /* maximum number of compression threads (>= 1) */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 442 | local int setdict; /* true to initialize dictionary in each thread */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 443 | local size_t size; /* uncompressed input size per thread (>= 32K) */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 444 | local int warned = 0; /* true if a warning has been given */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 445 | |
| 446 | /* saved gzip/zip header data for decompression, testing, and listing */ |
| 447 | local time_t stamp; /* time stamp from gzip header */ |
| 448 | local char *hname = NULL; /* name from header (allocated) */ |
| 449 | local unsigned long zip_crc; /* local header crc */ |
| 450 | local unsigned long zip_clen; /* local header compressed length */ |
| 451 | local unsigned long zip_ulen; /* local header uncompressed length */ |
| 452 | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 453 | /* display a complaint with the program name on stderr */ |
| 454 | local int complain(char *fmt, ...) |
| 455 | { |
| 456 | va_list ap; |
| 457 | |
| 458 | if (verbosity > 0) { |
| 459 | fprintf(stderr, "%s: ", prog); |
| 460 | va_start(ap, fmt); |
| 461 | vfprintf(stderr, fmt, ap); |
| 462 | va_end(ap); |
| 463 | putc('\n', stderr); |
| 464 | fflush(stderr); |
| 465 | warned = 1; |
| 466 | } |
| 467 | return 0; |
| 468 | } |
| 469 | |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 470 | /* exit with error, delete output file if in the middle of writing it */ |
| 471 | local int bail(char *why, char *what) |
| 472 | { |
| 473 | if (outd != -1 && out != NULL) |
| 474 | unlink(out); |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 475 | complain("abort: %s%s", why, what); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 476 | exit(1); |
| 477 | return 0; |
| 478 | } |
| 479 | |
| 480 | #ifdef DEBUG |
| 481 | |
| 482 | /* starting time of day for tracing */ |
| 483 | local struct timeval start; |
| 484 | |
| 485 | /* trace log */ |
| 486 | local struct log { |
| 487 | struct timeval when; /* time of entry */ |
| 488 | char *msg; /* message */ |
| 489 | struct log *next; /* next entry */ |
| 490 | } *log_head, **log_tail = NULL; |
| 491 | #ifndef NOTHREAD |
| 492 | local lock *log_lock = NULL; |
| 493 | #endif |
| 494 | |
| 495 | /* maximum log entry length */ |
| 496 | #define MAXMSG 256 |
| 497 | |
| 498 | /* set up log (call from main thread before other threads launched) */ |
| 499 | local void log_init(void) |
| 500 | { |
| 501 | if (log_tail == NULL) { |
| 502 | #ifndef NOTHREAD |
| 503 | log_lock = new_lock(0); |
| 504 | #endif |
| 505 | log_head = NULL; |
| 506 | log_tail = &log_head; |
| 507 | } |
| 508 | } |
| 509 | |
| 510 | /* add entry to trace log */ |
| 511 | local void log_add(char *fmt, ...) |
| 512 | { |
| 513 | struct timeval now; |
| 514 | struct log *me; |
| 515 | va_list ap; |
| 516 | char msg[MAXMSG]; |
| 517 | |
| 518 | gettimeofday(&now, NULL); |
| 519 | me = malloc(sizeof(struct log)); |
| 520 | if (me == NULL) |
| 521 | bail("not enough memory", ""); |
| 522 | me->when = now; |
| 523 | va_start(ap, fmt); |
| 524 | vsnprintf(msg, MAXMSG, fmt, ap); |
| 525 | va_end(ap); |
| 526 | me->msg = malloc(strlen(msg) + 1); |
| 527 | if (me->msg == NULL) { |
| 528 | free(me); |
| 529 | bail("not enough memory", ""); |
| 530 | } |
| 531 | strcpy(me->msg, msg); |
| 532 | me->next = NULL; |
| 533 | #ifndef NOTHREAD |
| 534 | assert(log_lock != NULL); |
| 535 | possess(log_lock); |
| 536 | #endif |
| 537 | *log_tail = me; |
| 538 | log_tail = &(me->next); |
| 539 | #ifndef NOTHREAD |
| 540 | twist(log_lock, BY, +1); |
| 541 | #endif |
| 542 | } |
| 543 | |
| 544 | /* pull entry from trace log and print it, return false if empty */ |
| 545 | local int log_show(void) |
| 546 | { |
| 547 | struct log *me; |
| 548 | struct timeval diff; |
| 549 | |
| 550 | if (log_tail == NULL) |
| 551 | return 0; |
| 552 | #ifndef NOTHREAD |
| 553 | possess(log_lock); |
| 554 | #endif |
| 555 | me = log_head; |
| 556 | if (me == NULL) { |
| 557 | #ifndef NOTHREAD |
| 558 | release(log_lock); |
| 559 | #endif |
| 560 | return 0; |
| 561 | } |
| 562 | log_head = me->next; |
| 563 | if (me->next == NULL) |
| 564 | log_tail = &log_head; |
| 565 | #ifndef NOTHREAD |
| 566 | twist(log_lock, BY, -1); |
| 567 | #endif |
| 568 | diff.tv_usec = me->when.tv_usec - start.tv_usec; |
| 569 | diff.tv_sec = me->when.tv_sec - start.tv_sec; |
| 570 | if (diff.tv_usec < 0) { |
| 571 | diff.tv_usec += 1000000L; |
| 572 | diff.tv_sec--; |
| 573 | } |
| 574 | fprintf(stderr, "trace %ld.%06ld %s\n", |
| 575 | (long)diff.tv_sec, (long)diff.tv_usec, me->msg); |
| 576 | fflush(stderr); |
| 577 | free(me->msg); |
| 578 | free(me); |
| 579 | return 1; |
| 580 | } |
| 581 | |
| 582 | /* release log resources (need to do log_init() to use again) */ |
| 583 | local void log_free(void) |
| 584 | { |
| 585 | struct log *me; |
| 586 | |
| 587 | if (log_tail != NULL) { |
| 588 | #ifndef NOTHREAD |
| 589 | possess(log_lock); |
| 590 | #endif |
| 591 | while ((me = log_head) != NULL) { |
| 592 | log_head = me->next; |
| 593 | free(me->msg); |
| 594 | free(me); |
| 595 | } |
| 596 | #ifndef NOTHREAD |
| 597 | twist(log_lock, TO, 0); |
| 598 | free_lock(log_lock); |
| 599 | log_lock = NULL; |
| 600 | #endif |
| 601 | log_tail = NULL; |
| 602 | } |
| 603 | } |
| 604 | |
| 605 | /* show entries until no more, free log */ |
| 606 | local void log_dump(void) |
| 607 | { |
| 608 | if (log_tail == NULL) |
| 609 | return; |
| 610 | while (log_show()) |
| 611 | ; |
| 612 | log_free(); |
| 613 | } |
| 614 | |
| 615 | /* debugging macro */ |
| 616 | #define Trace(x) \ |
| 617 | do { \ |
| 618 | if (verbosity > 2) { \ |
| 619 | log_add x; \ |
| 620 | } \ |
| 621 | } while (0) |
| 622 | |
| 623 | #else /* !DEBUG */ |
| 624 | |
| 625 | #define log_dump() |
| 626 | #define Trace(x) |
| 627 | |
| 628 | #endif |
| 629 | |
| 630 | /* read up to len bytes into buf, repeating read() calls as needed */ |
| 631 | local size_t readn(int desc, unsigned char *buf, size_t len) |
| 632 | { |
| 633 | ssize_t ret; |
| 634 | size_t got; |
| 635 | |
| 636 | got = 0; |
| 637 | while (len) { |
| 638 | ret = read(desc, buf, len); |
| 639 | if (ret < 0) |
| 640 | bail("read error on ", in); |
| 641 | if (ret == 0) |
| 642 | break; |
| 643 | buf += ret; |
| 644 | len -= ret; |
| 645 | got += ret; |
| 646 | } |
| 647 | return got; |
| 648 | } |
| 649 | |
| 650 | /* write len bytes, repeating write() calls as needed */ |
| 651 | local void writen(int desc, unsigned char *buf, size_t len) |
| 652 | { |
| 653 | ssize_t ret; |
| 654 | |
| 655 | while (len) { |
| 656 | ret = write(desc, buf, len); |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 657 | if (ret < 1) { |
| 658 | complain("write error code %d", errno); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 659 | bail("write error on ", out); |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 660 | } |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 661 | buf += ret; |
| 662 | len -= ret; |
| 663 | } |
| 664 | } |
| 665 | |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 666 | /* convert Unix time to MS-DOS date and time, assuming current timezone |
| 667 | (you got a better idea?) */ |
| 668 | local unsigned long time2dos(time_t t) |
| 669 | { |
| 670 | struct tm *tm; |
| 671 | unsigned long dos; |
| 672 | |
| 673 | if (t == 0) |
| 674 | t = time(NULL); |
| 675 | tm = localtime(&t); |
| 676 | if (tm->tm_year < 80 || tm->tm_year > 207) |
| 677 | return 0; |
| 678 | dos = (tm->tm_year - 80) << 25; |
| 679 | dos += (tm->tm_mon + 1) << 21; |
| 680 | dos += tm->tm_mday << 16; |
| 681 | dos += tm->tm_hour << 11; |
| 682 | dos += tm->tm_min << 5; |
| 683 | dos += (tm->tm_sec + 1) >> 1; /* round to double-seconds */ |
| 684 | return dos; |
| 685 | } |
| 686 | |
| 687 | /* put a 4-byte integer into a byte array in LSB order or MSB order */ |
| 688 | #define PUT2L(a,b) (*(a)=(b)&0xff,(a)[1]=(b)>>8) |
| 689 | #define PUT4L(a,b) (PUT2L(a,(b)&0xffff),PUT2L((a)+2,(b)>>16)) |
| 690 | #define PUT4M(a,b) (*(a)=(b)>>24,(a)[1]=(b)>>16,(a)[2]=(b)>>8,(a)[3]=(b)) |
| 691 | |
| 692 | /* write a gzip, zlib, or zip header using the information in the globals */ |
| 693 | local unsigned long put_header(void) |
| 694 | { |
| 695 | unsigned long len; |
| 696 | unsigned char head[30]; |
| 697 | |
| 698 | if (form > 1) { /* zip */ |
| 699 | /* write local header */ |
| 700 | PUT4L(head, 0x04034b50UL); /* local header signature */ |
| 701 | PUT2L(head + 4, 20); /* version needed to extract (2.0) */ |
| 702 | PUT2L(head + 6, 8); /* flags: data descriptor follows data */ |
| 703 | PUT2L(head + 8, 8); /* deflate */ |
| 704 | PUT4L(head + 10, time2dos(mtime)); |
| 705 | PUT4L(head + 14, 0); /* crc (not here) */ |
| 706 | PUT4L(head + 18, 0); /* compressed length (not here) */ |
| 707 | PUT4L(head + 22, 0); /* uncompressed length (not here) */ |
| 708 | PUT2L(head + 26, name == NULL ? 1 : strlen(name)); /* name length */ |
| 709 | PUT2L(head + 28, 9); /* length of extra field (see below) */ |
| 710 | writen(outd, head, 30); /* write local header */ |
| 711 | len = 30; |
| 712 | |
| 713 | /* write file name (use "-" for stdin) */ |
| 714 | if (name == NULL) |
| 715 | writen(outd, (unsigned char *)"-", 1); |
| 716 | else |
| 717 | writen(outd, (unsigned char *)name, strlen(name)); |
| 718 | len += name == NULL ? 1 : strlen(name); |
| 719 | |
| 720 | /* write extended timestamp extra field block (9 bytes) */ |
| 721 | PUT2L(head, 0x5455); /* extended timestamp signature */ |
| 722 | PUT2L(head + 2, 5); /* number of data bytes in this block */ |
| 723 | head[4] = 1; /* flag presence of mod time */ |
| 724 | PUT4L(head + 5, mtime); /* mod time */ |
| 725 | writen(outd, head, 9); /* write extra field block */ |
| 726 | len += 9; |
| 727 | } |
| 728 | else if (form) { /* zlib */ |
| 729 | head[0] = 0x78; /* deflate, 32K window */ |
| 730 | head[1] = (level == 9 ? 3 : (level == 1 ? 0 : |
| 731 | (level >= 6 || level == Z_DEFAULT_COMPRESSION ? 1 : 2))) << 6; |
| 732 | head[1] += 31 - (((head[0] << 8) + head[1]) % 31); |
| 733 | writen(outd, head, 2); |
| 734 | len = 2; |
| 735 | } |
| 736 | else { /* gzip */ |
| 737 | head[0] = 31; |
| 738 | head[1] = 139; |
| 739 | head[2] = 8; /* deflate */ |
| 740 | head[3] = name != NULL ? 8 : 0; |
| 741 | PUT4L(head + 4, mtime); |
| 742 | head[8] = level == 9 ? 2 : (level == 1 ? 4 : 0); |
| 743 | head[9] = 3; /* unix */ |
| 744 | writen(outd, head, 10); |
| 745 | len = 10; |
| 746 | if (name != NULL) |
| 747 | writen(outd, (unsigned char *)name, strlen(name) + 1); |
| 748 | if (name != NULL) |
| 749 | len += strlen(name) + 1; |
| 750 | } |
| 751 | return len; |
| 752 | } |
| 753 | |
| 754 | /* write a gzip, zlib, or zip trailer */ |
| 755 | local void put_trailer(unsigned long ulen, unsigned long clen, |
| 756 | unsigned long check, unsigned long head) |
| 757 | { |
| 758 | unsigned char tail[46]; |
| 759 | |
| 760 | if (form > 1) { /* zip */ |
| 761 | unsigned long cent; |
| 762 | |
| 763 | /* write data descriptor (as promised in local header) */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 764 | PUT4L(tail, 0x08074b50UL); |
| 765 | PUT4L(tail + 4, check); |
| 766 | PUT4L(tail + 8, clen); |
| 767 | PUT4L(tail + 12, ulen); |
| 768 | writen(outd, tail, 16); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 769 | |
| 770 | /* write central file header */ |
| 771 | PUT4L(tail, 0x02014b50UL); /* central header signature */ |
| 772 | tail[4] = 63; /* obeyed version 6.3 of the zip spec */ |
| 773 | tail[5] = 255; /* ignore external attributes */ |
| 774 | PUT2L(tail + 6, 20); /* version needed to extract (2.0) */ |
| 775 | PUT2L(tail + 8, 8); /* data descriptor is present */ |
| 776 | PUT2L(tail + 10, 8); /* deflate */ |
| 777 | PUT4L(tail + 12, time2dos(mtime)); |
| 778 | PUT4L(tail + 16, check); /* crc */ |
| 779 | PUT4L(tail + 20, clen); /* compressed length */ |
| 780 | PUT4L(tail + 24, ulen); /* uncompressed length */ |
| 781 | PUT2L(tail + 28, name == NULL ? 1 : strlen(name)); /* name length */ |
| 782 | PUT2L(tail + 30, 9); /* length of extra field (see below) */ |
| 783 | PUT2L(tail + 32, 0); /* no file comment */ |
| 784 | PUT2L(tail + 34, 0); /* disk number 0 */ |
| 785 | PUT2L(tail + 36, 0); /* internal file attributes */ |
| 786 | PUT4L(tail + 38, 0); /* external file attributes (ignored) */ |
| 787 | PUT4L(tail + 42, 0); /* offset of local header */ |
| 788 | writen(outd, tail, 46); /* write central file header */ |
| 789 | cent = 46; |
| 790 | |
| 791 | /* write file name (use "-" for stdin) */ |
| 792 | if (name == NULL) |
| 793 | writen(outd, (unsigned char *)"-", 1); |
| 794 | else |
| 795 | writen(outd, (unsigned char *)name, strlen(name)); |
| 796 | cent += name == NULL ? 1 : strlen(name); |
| 797 | |
| 798 | /* write extended timestamp extra field block (9 bytes) */ |
| 799 | PUT2L(tail, 0x5455); /* extended timestamp signature */ |
| 800 | PUT2L(tail + 2, 5); /* number of data bytes in this block */ |
| 801 | tail[4] = 1; /* flag presence of mod time */ |
| 802 | PUT4L(tail + 5, mtime); /* mod time */ |
| 803 | writen(outd, tail, 9); /* write extra field block */ |
| 804 | cent += 9; |
| 805 | |
| 806 | /* write end of central directory record */ |
| 807 | PUT4L(tail, 0x06054b50UL); /* end of central directory signature */ |
| 808 | PUT2L(tail + 4, 0); /* number of this disk */ |
| 809 | PUT2L(tail + 6, 0); /* disk with start of central directory */ |
| 810 | PUT2L(tail + 8, 1); /* number of entries on this disk */ |
| 811 | PUT2L(tail + 10, 1); /* total number of entries */ |
| 812 | PUT4L(tail + 12, cent); /* size of central directory */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 813 | PUT4L(tail + 16, head + clen + 16); /* offset of central directory */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 814 | PUT2L(tail + 20, 0); /* no zip file comment */ |
| 815 | writen(outd, tail, 22); /* write end of central directory record */ |
| 816 | } |
| 817 | else if (form) { /* zlib */ |
| 818 | PUT4M(tail, check); |
| 819 | writen(outd, tail, 4); |
| 820 | } |
| 821 | else { /* gzip */ |
| 822 | PUT4L(tail, check); |
| 823 | PUT4L(tail + 4, ulen); |
| 824 | writen(outd, tail, 8); |
| 825 | } |
| 826 | } |
| 827 | |
| 828 | /* compute check value depending on format */ |
| 829 | #define CHECK(a,b,c) (form == 1 ? adler32(a,b,c) : crc32(a,b,c)) |
| 830 | |
| 831 | #ifndef NOTHREAD |
| 832 | /* -- threaded portions of pigz -- */ |
| 833 | |
| 834 | /* -- check value combination routines for parallel calculation -- */ |
| 835 | |
| 836 | #define COMB(a,b,c) (form == 1 ? adler32_comb(a,b,c) : crc32_comb(a,b,c)) |
| 837 | /* combine two crc-32's or two adler-32's (copied from zlib 1.2.3 so that pigz |
| 838 | can be compatible with older versions of zlib) */ |
| 839 | |
| 840 | /* we copy the combination routines from zlib here, in order to avoid |
| 841 | linkage issues with the zlib 1.2.3 builds on Sun, Ubuntu, and others */ |
| 842 | |
| 843 | local unsigned long gf2_matrix_times(unsigned long *mat, unsigned long vec) |
| 844 | { |
| 845 | unsigned long sum; |
| 846 | |
| 847 | sum = 0; |
| 848 | while (vec) { |
| 849 | if (vec & 1) |
| 850 | sum ^= *mat; |
| 851 | vec >>= 1; |
| 852 | mat++; |
| 853 | } |
| 854 | return sum; |
| 855 | } |
| 856 | |
| 857 | local void gf2_matrix_square(unsigned long *square, unsigned long *mat) |
| 858 | { |
| 859 | int n; |
| 860 | |
| 861 | for (n = 0; n < 32; n++) |
| 862 | square[n] = gf2_matrix_times(mat, mat[n]); |
| 863 | } |
| 864 | |
| 865 | local unsigned long crc32_comb(unsigned long crc1, unsigned long crc2, |
| 866 | size_t len2) |
| 867 | { |
| 868 | int n; |
| 869 | unsigned long row; |
| 870 | unsigned long even[32]; /* even-power-of-two zeros operator */ |
| 871 | unsigned long odd[32]; /* odd-power-of-two zeros operator */ |
| 872 | |
| 873 | /* degenerate case */ |
| 874 | if (len2 == 0) |
| 875 | return crc1; |
| 876 | |
| 877 | /* put operator for one zero bit in odd */ |
| 878 | odd[0] = 0xedb88320UL; /* CRC-32 polynomial */ |
| 879 | row = 1; |
| 880 | for (n = 1; n < 32; n++) { |
| 881 | odd[n] = row; |
| 882 | row <<= 1; |
| 883 | } |
| 884 | |
| 885 | /* put operator for two zero bits in even */ |
| 886 | gf2_matrix_square(even, odd); |
| 887 | |
| 888 | /* put operator for four zero bits in odd */ |
| 889 | gf2_matrix_square(odd, even); |
| 890 | |
| 891 | /* apply len2 zeros to crc1 (first square will put the operator for one |
| 892 | zero byte, eight zero bits, in even) */ |
| 893 | do { |
| 894 | /* apply zeros operator for this bit of len2 */ |
| 895 | gf2_matrix_square(even, odd); |
| 896 | if (len2 & 1) |
| 897 | crc1 = gf2_matrix_times(even, crc1); |
| 898 | len2 >>= 1; |
| 899 | |
| 900 | /* if no more bits set, then done */ |
| 901 | if (len2 == 0) |
| 902 | break; |
| 903 | |
| 904 | /* another iteration of the loop with odd and even swapped */ |
| 905 | gf2_matrix_square(odd, even); |
| 906 | if (len2 & 1) |
| 907 | crc1 = gf2_matrix_times(odd, crc1); |
| 908 | len2 >>= 1; |
| 909 | |
| 910 | /* if no more bits set, then done */ |
| 911 | } while (len2 != 0); |
| 912 | |
| 913 | /* return combined crc */ |
| 914 | crc1 ^= crc2; |
| 915 | return crc1; |
| 916 | } |
| 917 | |
| 918 | #define BASE 65521U /* largest prime smaller than 65536 */ |
| 919 | #define LOW16 0xffff /* mask lower 16 bits */ |
| 920 | |
| 921 | local unsigned long adler32_comb(unsigned long adler1, unsigned long adler2, |
| 922 | size_t len2) |
| 923 | { |
| 924 | unsigned long sum1; |
| 925 | unsigned long sum2; |
| 926 | unsigned rem; |
| 927 | |
| 928 | /* the derivation of this formula is left as an exercise for the reader */ |
| 929 | rem = (unsigned)(len2 % BASE); |
| 930 | sum1 = adler1 & LOW16; |
| 931 | sum2 = (rem * sum1) % BASE; |
| 932 | sum1 += (adler2 & LOW16) + BASE - 1; |
| 933 | sum2 += ((adler1 >> 16) & LOW16) + ((adler2 >> 16) & LOW16) + BASE - rem; |
| 934 | if (sum1 >= BASE) sum1 -= BASE; |
| 935 | if (sum1 >= BASE) sum1 -= BASE; |
| 936 | if (sum2 >= (BASE << 1)) sum2 -= (BASE << 1); |
| 937 | if (sum2 >= BASE) sum2 -= BASE; |
| 938 | return sum1 | (sum2 << 16); |
| 939 | } |
| 940 | |
| 941 | /* -- pool of spaces for buffer management -- */ |
| 942 | |
| 943 | /* These routines manage a pool of spaces. Each pool specifies a fixed size |
| 944 | buffer to be contained in each space. Each space has a use count, which |
| 945 | when decremented to zero returns the space to the pool. If a space is |
| 946 | requested from the pool and the pool is empty, a space is immediately |
| 947 | created unless a specified limit on the number of spaces has been reached. |
| 948 | Only if the limit is reached will it wait for a space to be returned to the |
| 949 | pool. Each space knows what pool it belongs to, so that it can be returned. |
| 950 | */ |
| 951 | |
| 952 | /* a space (one buffer for each space) */ |
| 953 | struct space { |
| 954 | lock *use; /* use count -- return to pool when zero */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 955 | unsigned char *buf; /* buffer of size size */ |
| 956 | size_t size; /* current size of this buffer */ |
| 957 | size_t len; /* for application usage (initially zero) */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 958 | struct pool *pool; /* pool to return to */ |
| 959 | struct space *next; /* for pool linked list */ |
| 960 | }; |
| 961 | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 962 | /* pool of spaces (one pool for each type needed) */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 963 | struct pool { |
| 964 | lock *have; /* unused spaces available, lock for list */ |
| 965 | struct space *head; /* linked list of available buffers */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 966 | size_t size; /* size of new buffers in this pool */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 967 | int limit; /* number of new spaces allowed, or -1 */ |
| 968 | int made; /* number of buffers made */ |
| 969 | }; |
| 970 | |
| 971 | /* initialize a pool (pool structure itself provided, not allocated) -- the |
| 972 | limit is the maximum number of spaces in the pool, or -1 to indicate no |
| 973 | limit, i.e., to never wait for a buffer to return to the pool */ |
| 974 | local void new_pool(struct pool *pool, size_t size, int limit) |
| 975 | { |
| 976 | pool->have = new_lock(0); |
| 977 | pool->head = NULL; |
| 978 | pool->size = size; |
| 979 | pool->limit = limit; |
| 980 | pool->made = 0; |
| 981 | } |
| 982 | |
| 983 | /* get a space from a pool -- the use count is initially set to one, so there |
| 984 | is no need to call use_space() for the first use */ |
| 985 | local struct space *get_space(struct pool *pool) |
| 986 | { |
| 987 | struct space *space; |
| 988 | |
| 989 | /* if can't create any more, wait for a space to show up */ |
| 990 | possess(pool->have); |
| 991 | if (pool->limit == 0) |
| 992 | wait_for(pool->have, NOT_TO_BE, 0); |
| 993 | |
| 994 | /* if a space is available, pull it from the list and return it */ |
| 995 | if (pool->head != NULL) { |
| 996 | space = pool->head; |
| 997 | possess(space->use); |
| 998 | pool->head = space->next; |
| 999 | twist(pool->have, BY, -1); /* one less in pool */ |
| 1000 | twist(space->use, TO, 1); /* initially one user */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1001 | space->len = 0; |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1002 | return space; |
| 1003 | } |
| 1004 | |
| 1005 | /* nothing available, don't want to wait, make a new space */ |
| 1006 | assert(pool->limit != 0); |
| 1007 | if (pool->limit > 0) |
| 1008 | pool->limit--; |
| 1009 | pool->made++; |
| 1010 | release(pool->have); |
| 1011 | space = malloc(sizeof(struct space)); |
| 1012 | if (space == NULL) |
| 1013 | bail("not enough memory", ""); |
| 1014 | space->use = new_lock(1); /* initially one user */ |
| 1015 | space->buf = malloc(pool->size); |
| 1016 | if (space->buf == NULL) |
| 1017 | bail("not enough memory", ""); |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1018 | space->size = pool->size; |
| 1019 | space->len = 0; |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1020 | space->pool = pool; /* remember the pool this belongs to */ |
| 1021 | return space; |
| 1022 | } |
| 1023 | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1024 | /* compute next size up by multiplying by about 2**(1/3) and round to the next |
| 1025 | power of 2 if we're close (so three applications results in doubling) -- if |
| 1026 | small, go up to at least 16, if overflow, go to max size_t value */ |
| 1027 | local size_t grow(size_t size) |
| 1028 | { |
| 1029 | size_t was, top; |
| 1030 | int shift; |
| 1031 | |
| 1032 | was = size; |
| 1033 | size += size >> 2; |
| 1034 | top = size; |
| 1035 | for (shift = 0; top > 7; shift++) |
| 1036 | top >>= 1; |
| 1037 | if (top == 7) |
| 1038 | size = (size_t)1 << (shift + 3); |
| 1039 | if (size < 16) |
| 1040 | size = 16; |
| 1041 | if (size <= was) |
| 1042 | size = (size_t)0 - 1; |
| 1043 | return size; |
| 1044 | } |
| 1045 | |
| 1046 | /* increase the size of the buffer in space */ |
| 1047 | local void grow_space(struct space *space) |
| 1048 | { |
| 1049 | size_t more; |
| 1050 | |
| 1051 | /* compute next size up */ |
| 1052 | more = grow(space->size); |
| 1053 | if (more == space->size) |
| 1054 | bail("not enough memory", ""); |
| 1055 | |
| 1056 | /* reallocate the buffer */ |
| 1057 | space->buf = realloc(space->buf, more); |
| 1058 | if (space->buf == NULL) |
| 1059 | bail("not enough memory", ""); |
| 1060 | space->size = more; |
| 1061 | } |
| 1062 | |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1063 | /* increment the use count to require one more drop before returning this space |
| 1064 | to the pool */ |
| 1065 | local void use_space(struct space *space) |
| 1066 | { |
| 1067 | possess(space->use); |
| 1068 | twist(space->use, BY, +1); |
| 1069 | } |
| 1070 | |
| 1071 | /* drop a space, returning it to the pool if the use count is zero */ |
| 1072 | local void drop_space(struct space *space) |
| 1073 | { |
| 1074 | int use; |
| 1075 | struct pool *pool; |
| 1076 | |
| 1077 | possess(space->use); |
| 1078 | use = peek_lock(space->use); |
| 1079 | assert(use != 0); |
| 1080 | if (use == 1) { |
| 1081 | pool = space->pool; |
| 1082 | possess(pool->have); |
| 1083 | space->next = pool->head; |
| 1084 | pool->head = space; |
| 1085 | twist(pool->have, BY, +1); |
| 1086 | } |
| 1087 | twist(space->use, BY, -1); |
| 1088 | } |
| 1089 | |
| 1090 | /* free the memory and lock resources of a pool -- return number of spaces for |
| 1091 | debugging and resource usage measurement */ |
| 1092 | local int free_pool(struct pool *pool) |
| 1093 | { |
| 1094 | int count; |
| 1095 | struct space *space; |
| 1096 | |
| 1097 | possess(pool->have); |
| 1098 | count = 0; |
| 1099 | while ((space = pool->head) != NULL) { |
| 1100 | pool->head = space->next; |
| 1101 | free(space->buf); |
| 1102 | free_lock(space->use); |
| 1103 | free(space); |
| 1104 | count++; |
| 1105 | } |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1106 | assert(count == pool->made); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1107 | release(pool->have); |
| 1108 | free_lock(pool->have); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1109 | return count; |
| 1110 | } |
| 1111 | |
| 1112 | /* input and output buffer pools */ |
| 1113 | local struct pool in_pool; |
| 1114 | local struct pool out_pool; |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1115 | local struct pool dict_pool; |
| 1116 | local struct pool lens_pool; |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1117 | |
| 1118 | /* -- parallel compression -- */ |
| 1119 | |
| 1120 | /* compress or write job (passed from compress list to write list) -- if seq is |
| 1121 | equal to -1, compress_thread is instructed to return; if more is false then |
| 1122 | this is the last chunk, which after writing tells write_thread to return */ |
| 1123 | struct job { |
| 1124 | long seq; /* sequence number */ |
| 1125 | int more; /* true if this is not the last chunk */ |
| 1126 | struct space *in; /* input data to compress */ |
| 1127 | struct space *out; /* dictionary or resulting compressed data */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1128 | struct space *lens; /* coded list of flush block lengths */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1129 | unsigned long check; /* check value for input data */ |
| 1130 | lock *calc; /* released when check calculation complete */ |
| 1131 | struct job *next; /* next job in the list (either list) */ |
| 1132 | }; |
| 1133 | |
| 1134 | /* list of compress jobs (with tail for appending to list) */ |
| 1135 | local lock *compress_have = NULL; /* number of compress jobs waiting */ |
| 1136 | local struct job *compress_head, **compress_tail; |
| 1137 | |
| 1138 | /* list of write jobs */ |
| 1139 | local lock *write_first; /* lowest sequence number in list */ |
| 1140 | local struct job *write_head; |
| 1141 | |
| 1142 | /* number of compression threads running */ |
| 1143 | local int cthreads = 0; |
| 1144 | |
| 1145 | /* write thread if running */ |
| 1146 | local thread *writeth = NULL; |
| 1147 | |
| 1148 | /* setup job lists (call from main thread) */ |
| 1149 | local void setup_jobs(void) |
| 1150 | { |
| 1151 | /* set up only if not already set up*/ |
| 1152 | if (compress_have != NULL) |
| 1153 | return; |
| 1154 | |
| 1155 | /* allocate locks and initialize lists */ |
| 1156 | compress_have = new_lock(0); |
| 1157 | compress_head = NULL; |
| 1158 | compress_tail = &compress_head; |
| 1159 | write_first = new_lock(-1); |
| 1160 | write_head = NULL; |
| 1161 | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1162 | /* initialize buffer pools (initial size for out_pool not critical, since |
| 1163 | buffers will be grown in size if needed -- initial size chosen to make |
| 1164 | this unlikely -- same for lens_pool) */ |
| 1165 | new_pool(&in_pool, size, INBUFS(procs)); |
| 1166 | new_pool(&out_pool, OUTPOOL(size), -1); |
| 1167 | new_pool(&dict_pool, DICT, -1); |
| 1168 | new_pool(&lens_pool, size >> (RSYNCBITS - 1), -1); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1169 | } |
| 1170 | |
| 1171 | /* command the compress threads to all return, then join them all (call from |
| 1172 | main thread), free all the thread-related resources */ |
| 1173 | local void finish_jobs(void) |
| 1174 | { |
| 1175 | struct job job; |
| 1176 | int caught; |
| 1177 | |
| 1178 | /* only do this once */ |
| 1179 | if (compress_have == NULL) |
| 1180 | return; |
| 1181 | |
| 1182 | /* command all of the extant compress threads to return */ |
| 1183 | possess(compress_have); |
| 1184 | job.seq = -1; |
| 1185 | job.next = NULL; |
| 1186 | compress_head = &job; |
| 1187 | compress_tail = &(job.next); |
| 1188 | twist(compress_have, BY, +1); /* will wake them all up */ |
| 1189 | |
| 1190 | /* join all of the compress threads, verify they all came back */ |
| 1191 | caught = join_all(); |
| 1192 | Trace(("-- joined %d compress threads", caught)); |
| 1193 | assert(caught == cthreads); |
| 1194 | cthreads = 0; |
| 1195 | |
| 1196 | /* free the resources */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1197 | caught = free_pool(&lens_pool); |
| 1198 | Trace(("-- freed %d block lengths buffers", caught)); |
| 1199 | caught = free_pool(&dict_pool); |
| 1200 | Trace(("-- freed %d dictionary buffers", caught)); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1201 | caught = free_pool(&out_pool); |
| 1202 | Trace(("-- freed %d output buffers", caught)); |
| 1203 | caught = free_pool(&in_pool); |
| 1204 | Trace(("-- freed %d input buffers", caught)); |
| 1205 | free_lock(write_first); |
| 1206 | free_lock(compress_have); |
| 1207 | compress_have = NULL; |
| 1208 | } |
| 1209 | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1210 | /* compress all strm->avail_in bytes at strm->next_in to out->buf, updating |
| 1211 | out->len, grow the size of the buffer (out->size) if necessary -- respect |
| 1212 | the size limitations of the zlib stream data types (size_t may be larger |
| 1213 | than unsigned) */ |
| 1214 | local void deflate_engine(z_stream *strm, struct space *out, int flush) |
| 1215 | { |
| 1216 | size_t room; |
| 1217 | |
| 1218 | do { |
| 1219 | room = out->size - out->len; |
| 1220 | if (room == 0) { |
| 1221 | grow_space(out); |
| 1222 | room = out->size - out->len; |
| 1223 | } |
| 1224 | strm->next_out = out->buf + out->len; |
| 1225 | strm->avail_out = room < UINT_MAX ? (unsigned)room : UINT_MAX; |
| 1226 | (void)deflate(strm, flush); |
| 1227 | out->len = strm->next_out - out->buf; |
| 1228 | } while (strm->avail_out == 0); |
| 1229 | assert(strm->avail_in == 0); |
| 1230 | } |
| 1231 | |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1232 | /* get the next compression job from the head of the list, compress and compute |
| 1233 | the check value on the input, and put a job in the write list with the |
| 1234 | results -- keep looking for more jobs, returning when a job is found with a |
| 1235 | sequence number of -1 (leave that job in the list for other incarnations to |
| 1236 | find) */ |
| 1237 | local void compress_thread(void *dummy) |
| 1238 | { |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1239 | struct job *job; /* job pulled and working on */ |
| 1240 | struct job *here, **prior; /* pointers for inserting in write list */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1241 | unsigned long check; /* check value of input */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1242 | unsigned char *next; /* pointer for blocks, check value data */ |
| 1243 | size_t left; /* input left to process */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1244 | size_t len; /* remaining bytes to compress/check */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1245 | #if ZLIB_VERNUM >= 0x1260 |
| 1246 | int bits; /* deflate pending bits */ |
| 1247 | #endif |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1248 | z_stream strm; /* deflate stream */ |
| 1249 | |
| 1250 | (void)dummy; |
| 1251 | |
| 1252 | /* initialize the deflate stream for this thread */ |
| 1253 | strm.zfree = Z_NULL; |
| 1254 | strm.zalloc = Z_NULL; |
| 1255 | strm.opaque = Z_NULL; |
| 1256 | if (deflateInit2(&strm, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY) != |
| 1257 | Z_OK) |
| 1258 | bail("not enough memory", ""); |
| 1259 | |
| 1260 | /* keep looking for work */ |
| 1261 | for (;;) { |
| 1262 | /* get a job (like I tell my son) */ |
| 1263 | possess(compress_have); |
| 1264 | wait_for(compress_have, NOT_TO_BE, 0); |
| 1265 | job = compress_head; |
| 1266 | assert(job != NULL); |
| 1267 | if (job->seq == -1) |
| 1268 | break; |
| 1269 | compress_head = job->next; |
| 1270 | if (job->next == NULL) |
| 1271 | compress_tail = &compress_head; |
| 1272 | twist(compress_have, BY, -1); |
| 1273 | |
| 1274 | /* got a job -- initialize and set the compression level (note that if |
| 1275 | deflateParams() is called immediately after deflateReset(), there is |
| 1276 | no need to initialize the input/output for the stream) */ |
| 1277 | Trace(("-- compressing #%ld", job->seq)); |
| 1278 | (void)deflateReset(&strm); |
| 1279 | (void)deflateParams(&strm, level, Z_DEFAULT_STRATEGY); |
| 1280 | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1281 | /* set dictionary if provided, release that input or dictionary buffer |
| 1282 | (not NULL if dict is true and if this is not the first work unit) */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1283 | if (job->out != NULL) { |
| 1284 | len = job->out->len; |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1285 | left = len < DICT ? len : DICT; |
| 1286 | deflateSetDictionary(&strm, job->out->buf + (len - left), left); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1287 | drop_space(job->out); |
| 1288 | } |
| 1289 | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1290 | /* set up input and output */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1291 | job->out = get_space(&out_pool); |
| 1292 | strm.next_in = job->in->buf; |
| 1293 | strm.next_out = job->out->buf; |
| 1294 | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1295 | /* compress each block, either flushing or finishing */ |
| 1296 | next = job->lens == NULL ? NULL : job->lens->buf; |
| 1297 | left = job->in->len; |
| 1298 | job->out->len = 0; |
| 1299 | do { |
| 1300 | /* decode next block length from blocks list */ |
| 1301 | len = next == NULL ? 128 : *next++; |
| 1302 | if (len < 128) /* 64..32831 */ |
| 1303 | len = (len << 8) + (*next++) + 64; |
| 1304 | else if (len == 128) /* end of list */ |
| 1305 | len = left; |
| 1306 | else if (len < 192) /* 1..63 */ |
| 1307 | len &= 0x3f; |
| 1308 | else { /* 32832..4227135 */ |
| 1309 | len = ((len & 0x3f) << 16) + (*next++ << 8) + 32832U; |
| 1310 | len += *next++; |
| 1311 | } |
| 1312 | left -= len; |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1313 | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1314 | /* run MAXP2-sized amounts of input through deflate -- this loop is |
| 1315 | needed for those cases where the unsigned type is smaller than |
| 1316 | the size_t type, or when len is close to the limit of the size_t |
| 1317 | type */ |
| 1318 | while (len > MAXP2) { |
| 1319 | strm.avail_in = MAXP2; |
| 1320 | deflate_engine(&strm, job->out, Z_NO_FLUSH); |
| 1321 | len -= MAXP2; |
| 1322 | } |
| 1323 | |
| 1324 | /* run the last piece through deflate -- end on a byte boundary, |
| 1325 | using a sync marker if necessary, or finish the deflate stream |
| 1326 | if this is the last block */ |
| 1327 | strm.avail_in = (unsigned)len; |
| 1328 | if (left || job->more) { |
| 1329 | #if ZLIB_VERNUM >= 0x1260 |
| 1330 | deflate_engine(&strm, job->out, Z_BLOCK); |
| 1331 | |
| 1332 | /* add just enough empty blocks to get to a byte boundary */ |
| 1333 | (void)deflatePending(&strm, Z_NULL, &bits); |
| 1334 | if (bits & 1) |
| 1335 | deflate_engine(&strm, job->out, Z_SYNC_FLUSH); |
| 1336 | else if (bits & 7) { |
| 1337 | do { |
| 1338 | bits = deflatePrime(&strm, 10, 2); /* static empty */ |
| 1339 | assert(bits == Z_OK); |
| 1340 | (void)deflatePending(&strm, Z_NULL, &bits); |
| 1341 | } while (bits & 7); |
| 1342 | deflate_engine(&strm, job->out, Z_BLOCK); |
| 1343 | } |
| 1344 | #else |
| 1345 | deflate_engine(&strm, job->out, Z_SYNC_FLUSH); |
| 1346 | #endif |
| 1347 | } |
| 1348 | else |
| 1349 | deflate_engine(&strm, job->out, Z_FINISH); |
| 1350 | } while (left); |
| 1351 | if (job->lens != NULL) { |
| 1352 | drop_space(job->lens); |
| 1353 | job->lens = NULL; |
| 1354 | } |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1355 | Trace(("-- compressed #%ld%s", job->seq, job->more ? "" : " (last)")); |
| 1356 | |
| 1357 | /* reserve input buffer until check value has been calculated */ |
| 1358 | use_space(job->in); |
| 1359 | |
| 1360 | /* insert write job in list in sorted order, alert write thread */ |
| 1361 | possess(write_first); |
| 1362 | prior = &write_head; |
| 1363 | while ((here = *prior) != NULL) { |
| 1364 | if (here->seq > job->seq) |
| 1365 | break; |
| 1366 | prior = &(here->next); |
| 1367 | } |
| 1368 | job->next = here; |
| 1369 | *prior = job; |
| 1370 | twist(write_first, TO, write_head->seq); |
| 1371 | |
| 1372 | /* calculate the check value in parallel with writing, alert the write |
| 1373 | thread that the calculation is complete, and drop this usage of the |
| 1374 | input buffer */ |
| 1375 | len = job->in->len; |
| 1376 | next = job->in->buf; |
| 1377 | check = CHECK(0L, Z_NULL, 0); |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1378 | while (len > MAXP2) { |
| 1379 | check = CHECK(check, next, MAXP2); |
| 1380 | len -= MAXP2; |
| 1381 | next += MAXP2; |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1382 | } |
| 1383 | check = CHECK(check, next, (unsigned)len); |
| 1384 | drop_space(job->in); |
| 1385 | job->check = check; |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1386 | Trace(("-- checked #%ld%s", job->seq, job->more ? "" : " (last)")); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1387 | possess(job->calc); |
| 1388 | twist(job->calc, TO, 1); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1389 | |
| 1390 | /* done with that one -- go find another job */ |
| 1391 | } |
| 1392 | |
| 1393 | /* found job with seq == -1 -- free deflate memory and return to join */ |
| 1394 | release(compress_have); |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1395 | (void)deflateEnd(&strm); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1396 | } |
| 1397 | |
| 1398 | /* collect the write jobs off of the list in sequence order and write out the |
| 1399 | compressed data until the last chunk is written -- also write the header and |
| 1400 | trailer and combine the individual check values of the input buffers */ |
| 1401 | local void write_thread(void *dummy) |
| 1402 | { |
| 1403 | long seq; /* next sequence number looking for */ |
| 1404 | struct job *job; /* job pulled and working on */ |
| 1405 | size_t len; /* input length */ |
| 1406 | int more; /* true if more chunks to write */ |
| 1407 | unsigned long head; /* header length */ |
| 1408 | unsigned long ulen; /* total uncompressed size (overflow ok) */ |
| 1409 | unsigned long clen; /* total compressed size (overflow ok) */ |
| 1410 | unsigned long check; /* check value of uncompressed data */ |
| 1411 | |
| 1412 | (void)dummy; |
| 1413 | |
| 1414 | /* build and write header */ |
| 1415 | Trace(("-- write thread running")); |
| 1416 | head = put_header(); |
| 1417 | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1418 | /* process output of compress threads until end of input */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1419 | ulen = clen = 0; |
| 1420 | check = CHECK(0L, Z_NULL, 0); |
| 1421 | seq = 0; |
| 1422 | do { |
| 1423 | /* get next write job in order */ |
| 1424 | possess(write_first); |
| 1425 | wait_for(write_first, TO_BE, seq); |
| 1426 | job = write_head; |
| 1427 | write_head = job->next; |
| 1428 | twist(write_first, TO, write_head == NULL ? -1 : write_head->seq); |
| 1429 | |
| 1430 | /* update lengths, save uncompressed length for COMB */ |
| 1431 | more = job->more; |
| 1432 | len = job->in->len; |
| 1433 | drop_space(job->in); |
| 1434 | ulen += (unsigned long)len; |
| 1435 | clen += (unsigned long)(job->out->len); |
| 1436 | |
| 1437 | /* write the compressed data and drop the output buffer */ |
| 1438 | Trace(("-- writing #%ld", seq)); |
| 1439 | writen(outd, job->out->buf, job->out->len); |
| 1440 | drop_space(job->out); |
| 1441 | Trace(("-- wrote #%ld%s", seq, more ? "" : " (last)")); |
| 1442 | |
| 1443 | /* wait for check calculation to complete, then combine, once |
| 1444 | the compress thread is done with the input, release it */ |
| 1445 | possess(job->calc); |
| 1446 | wait_for(job->calc, TO_BE, 1); |
| 1447 | release(job->calc); |
| 1448 | check = COMB(check, job->check, len); |
| 1449 | |
| 1450 | /* free the job */ |
| 1451 | free_lock(job->calc); |
| 1452 | free(job); |
| 1453 | |
| 1454 | /* get the next buffer in sequence */ |
| 1455 | seq++; |
| 1456 | } while (more); |
| 1457 | |
| 1458 | /* write trailer */ |
| 1459 | put_trailer(ulen, clen, check, head); |
| 1460 | |
| 1461 | /* verify no more jobs, prepare for next use */ |
| 1462 | possess(compress_have); |
| 1463 | assert(compress_head == NULL && peek_lock(compress_have) == 0); |
| 1464 | release(compress_have); |
| 1465 | possess(write_first); |
| 1466 | assert(write_head == NULL); |
| 1467 | twist(write_first, TO, -1); |
| 1468 | } |
| 1469 | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1470 | /* encode a hash hit to the block lengths list -- hit == 0 ends the list */ |
| 1471 | local void append_len(struct job *job, size_t len) |
| 1472 | { |
| 1473 | struct space *lens; |
| 1474 | |
| 1475 | assert(len < 4227136UL); |
| 1476 | if (job->lens == NULL) |
| 1477 | job->lens = get_space(&lens_pool); |
| 1478 | lens = job->lens; |
| 1479 | if (lens->size < lens->len + 3) |
| 1480 | grow_space(lens); |
| 1481 | if (len < 64) |
| 1482 | lens->buf[lens->len++] = len + 128; |
| 1483 | else if (len < 32832U) { |
| 1484 | len -= 64; |
| 1485 | lens->buf[lens->len++] = len >> 8; |
| 1486 | lens->buf[lens->len++] = len; |
| 1487 | } |
| 1488 | else { |
| 1489 | len -= 32832U; |
| 1490 | lens->buf[lens->len++] = (len >> 16) + 192; |
| 1491 | lens->buf[lens->len++] = len >> 8; |
| 1492 | lens->buf[lens->len++] = len; |
| 1493 | } |
| 1494 | } |
| 1495 | |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1496 | /* compress ind to outd, using multiple threads for the compression and check |
| 1497 | value calculations and one other thread for writing the output -- compress |
| 1498 | threads will be launched and left running (waiting actually) to support |
| 1499 | subsequent calls of parallel_compress() */ |
| 1500 | local void parallel_compress(void) |
| 1501 | { |
| 1502 | long seq; /* sequence number */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1503 | struct space *curr; /* input data to compress */ |
| 1504 | struct space *next; /* input data that follows curr */ |
| 1505 | struct space *hold; /* input data that follows next */ |
| 1506 | struct space *dict; /* dictionary for next compression */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1507 | struct job *job; /* job for compress, then write */ |
| 1508 | int more; /* true if more input to read */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1509 | unsigned hash; /* hash for rsyncable */ |
| 1510 | unsigned char *scan; /* next byte to compute hash on */ |
| 1511 | unsigned char *end; /* after end of data to compute hash on */ |
| 1512 | unsigned char *last; /* position after last hit */ |
| 1513 | size_t left; /* last hit in curr to end of curr */ |
| 1514 | size_t len; /* for various length computations */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1515 | |
| 1516 | /* if first time or after an option change, setup the job lists */ |
| 1517 | setup_jobs(); |
| 1518 | |
| 1519 | /* start write thread */ |
| 1520 | writeth = launch(write_thread, NULL); |
| 1521 | |
| 1522 | /* read from input and start compress threads (write thread will pick up |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1523 | the output of the compress threads) */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1524 | seq = 0; |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1525 | next = get_space(&in_pool); |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1526 | next->len = readn(ind, next->buf, next->size); |
| 1527 | hold = NULL; |
| 1528 | dict = NULL; |
| 1529 | scan = next->buf; |
| 1530 | hash = RSYNCHIT; |
| 1531 | left = 0; |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1532 | do { |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1533 | /* create a new job */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1534 | job = malloc(sizeof(struct job)); |
| 1535 | if (job == NULL) |
| 1536 | bail("not enough memory", ""); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1537 | job->calc = new_lock(0); |
| 1538 | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1539 | /* update input spaces */ |
| 1540 | curr = next; |
| 1541 | next = hold; |
| 1542 | hold = NULL; |
| 1543 | |
| 1544 | /* get more input if we don't already have some */ |
| 1545 | if (next == NULL) { |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1546 | next = get_space(&in_pool); |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1547 | next->len = readn(ind, next->buf, next->size); |
| 1548 | } |
| 1549 | |
| 1550 | /* if rsyncable, generate block lengths and prepare curr for job to |
| 1551 | likely have less than size bytes (up to the last hash hit) */ |
| 1552 | job->lens = NULL; |
| 1553 | if (rsync && curr->len) { |
| 1554 | /* compute the hash function starting where we last left off to |
| 1555 | cover either size bytes or to EOF, whichever is less, through |
| 1556 | the data in curr (and in the next loop, through next) -- save |
| 1557 | the block lengths resulting from the hash hits in the job->lens |
| 1558 | list */ |
| 1559 | if (left == 0) { |
| 1560 | /* scan is in curr */ |
| 1561 | last = curr->buf; |
| 1562 | end = curr->buf + curr->len; |
| 1563 | while (scan < end) { |
| 1564 | hash = ((hash << 1) ^ *scan++) & RSYNCMASK; |
| 1565 | if (hash == RSYNCHIT) { |
| 1566 | len = scan - last; |
| 1567 | append_len(job, len); |
| 1568 | last = scan; |
| 1569 | } |
| 1570 | } |
| 1571 | |
| 1572 | /* continue scan in next */ |
| 1573 | left = scan - last; |
| 1574 | scan = next->buf; |
| 1575 | } |
| 1576 | |
| 1577 | /* scan in next for enough bytes to fill curr, or what is available |
| 1578 | in next, whichever is less (if next isn't full, then we're at |
| 1579 | the end of the file) -- the bytes in curr since the last hit, |
| 1580 | stored in left, counts towards the size of the first block */ |
| 1581 | last = next->buf; |
| 1582 | len = curr->size - curr->len; |
| 1583 | if (len > next->len) |
| 1584 | len = next->len; |
| 1585 | end = next->buf + len; |
| 1586 | while (scan < end) { |
| 1587 | hash = ((hash << 1) ^ *scan++) & RSYNCMASK; |
| 1588 | if (hash == RSYNCHIT) { |
| 1589 | len = (scan - last) + left; |
| 1590 | left = 0; |
| 1591 | append_len(job, len); |
| 1592 | last = scan; |
| 1593 | } |
| 1594 | } |
| 1595 | append_len(job, 0); |
| 1596 | |
| 1597 | /* create input in curr for job up to last hit or entire buffer if |
| 1598 | no hits at all -- save remainder in next and possibly hold */ |
| 1599 | len = (job->lens->len == 1 ? scan : last) - next->buf; |
| 1600 | if (len) { |
| 1601 | /* got hits in next, or no hits in either -- copy to curr */ |
| 1602 | memcpy(curr->buf + curr->len, next->buf, len); |
| 1603 | curr->len += len; |
| 1604 | memmove(next->buf, next->buf + len, next->len - len); |
| 1605 | next->len -= len; |
| 1606 | scan -= len; |
| 1607 | left = 0; |
| 1608 | } |
| 1609 | else if (job->lens->len != 1 && left && next->len) { |
| 1610 | /* had hits in curr, but none in next, and last hit in curr |
| 1611 | wasn't right at the end, so we have input there to save -- |
| 1612 | use curr up to the last hit, save the rest, moving next to |
| 1613 | hold */ |
| 1614 | hold = next; |
| 1615 | next = get_space(&in_pool); |
| 1616 | memcpy(next->buf, curr->buf + (curr->len - left), left); |
| 1617 | next->len = left; |
| 1618 | curr->len -= left; |
| 1619 | } |
| 1620 | else { |
| 1621 | /* else, last match happened to be right at the end of curr, |
| 1622 | or we're at the end of the input compressing the rest */ |
| 1623 | left = 0; |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1624 | } |
| 1625 | } |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1626 | |
| 1627 | /* compress curr->buf to curr->len -- compress thread will drop curr */ |
| 1628 | job->in = curr; |
| 1629 | |
| 1630 | /* set job->more if there is more to compress after curr */ |
| 1631 | more = next->len != 0; |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1632 | job->more = more; |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1633 | |
| 1634 | /* provide dictionary for this job, prepare dictionary for next job */ |
| 1635 | job->out = dict; |
| 1636 | if (more && setdict) { |
| 1637 | if (curr->len >= DICT || job->out == NULL) { |
| 1638 | dict = curr; |
| 1639 | use_space(dict); |
| 1640 | } |
| 1641 | else { |
| 1642 | dict = get_space(&dict_pool); |
| 1643 | len = DICT - curr->len; |
| 1644 | memcpy(dict->buf, job->out->buf + (job->out->len - len), len); |
| 1645 | memcpy(dict->buf + len, curr->buf, curr->len); |
| 1646 | dict->len = DICT; |
| 1647 | } |
| 1648 | } |
| 1649 | |
| 1650 | /* preparation of job is complete */ |
| 1651 | job->seq = seq; |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1652 | Trace(("-- read #%ld%s", seq, more ? "" : " (last)")); |
| 1653 | if (++seq < 1) |
| 1654 | bail("input too long: ", in); |
| 1655 | |
| 1656 | /* start another compress thread if needed */ |
| 1657 | if (cthreads < seq && cthreads < procs) { |
| 1658 | (void)launch(compress_thread, NULL); |
| 1659 | cthreads++; |
| 1660 | } |
| 1661 | |
| 1662 | /* put job at end of compress list, let all the compressors know */ |
| 1663 | possess(compress_have); |
| 1664 | job->next = NULL; |
| 1665 | *compress_tail = job; |
| 1666 | compress_tail = &(job->next); |
| 1667 | twist(compress_have, BY, +1); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1668 | } while (more); |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1669 | drop_space(next); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1670 | |
| 1671 | /* wait for the write thread to complete (we leave the compress threads out |
| 1672 | there and waiting in case there is another stream to compress) */ |
| 1673 | join(writeth); |
| 1674 | writeth = NULL; |
| 1675 | Trace(("-- write thread joined")); |
| 1676 | } |
| 1677 | |
| 1678 | #endif |
| 1679 | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1680 | /* repeated code in single_compress to compress available input and write it */ |
| 1681 | #define DEFLATE_WRITE(flush) \ |
| 1682 | do { \ |
| 1683 | do { \ |
| 1684 | strm->avail_out = out_size; \ |
| 1685 | strm->next_out = out; \ |
| 1686 | (void)deflate(strm, flush); \ |
| 1687 | writen(outd, out, out_size - strm->avail_out); \ |
| 1688 | clen += out_size - strm->avail_out; \ |
| 1689 | } while (strm->avail_out == 0); \ |
| 1690 | assert(strm->avail_in == 0); \ |
| 1691 | } while (0) |
| 1692 | |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1693 | /* do a simple compression in a single thread from ind to outd -- if reset is |
| 1694 | true, instead free the memory that was allocated and retained for input, |
| 1695 | output, and deflate */ |
| 1696 | local void single_compress(int reset) |
| 1697 | { |
| 1698 | size_t got; /* amount read */ |
| 1699 | size_t more; /* amount of next read (0 if eof) */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1700 | size_t start; /* start of next read */ |
| 1701 | size_t block; /* bytes in current block for -i */ |
| 1702 | unsigned hash; /* hash for rsyncable */ |
| 1703 | #if ZLIB_VERNUM >= 0x1260 |
| 1704 | int bits; /* deflate pending bits */ |
| 1705 | #endif |
| 1706 | unsigned char *scan; /* pointer for hash computation */ |
| 1707 | size_t left; /* bytes left to compress after hash hit */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1708 | unsigned long head; /* header length */ |
| 1709 | unsigned long ulen; /* total uncompressed size (overflow ok) */ |
| 1710 | unsigned long clen; /* total compressed size (overflow ok) */ |
| 1711 | unsigned long check; /* check value of uncompressed data */ |
| 1712 | static unsigned out_size; /* size of output buffer */ |
| 1713 | static unsigned char *in, *next, *out; /* reused i/o buffers */ |
| 1714 | static z_stream *strm = NULL; /* reused deflate structure */ |
| 1715 | |
| 1716 | /* if requested, just release the allocations and return */ |
| 1717 | if (reset) { |
| 1718 | if (strm != NULL) { |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1719 | (void)deflateEnd(strm); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1720 | free(strm); |
| 1721 | free(out); |
| 1722 | free(next); |
| 1723 | free(in); |
| 1724 | strm = NULL; |
| 1725 | } |
| 1726 | return; |
| 1727 | } |
| 1728 | |
| 1729 | /* initialize the deflate structure if this is the first time */ |
| 1730 | if (strm == NULL) { |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1731 | out_size = size > MAXP2 ? MAXP2 : (unsigned)size; |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1732 | if ((in = malloc(size)) == NULL || |
| 1733 | (next = malloc(size)) == NULL || |
| 1734 | (out = malloc(out_size)) == NULL || |
| 1735 | (strm = malloc(sizeof(z_stream))) == NULL) |
| 1736 | bail("not enough memory", ""); |
| 1737 | strm->zfree = Z_NULL; |
| 1738 | strm->zalloc = Z_NULL; |
| 1739 | strm->opaque = Z_NULL; |
| 1740 | if (deflateInit2(strm, level, Z_DEFLATED, -15, 8, |
| 1741 | Z_DEFAULT_STRATEGY) != Z_OK) |
| 1742 | bail("not enough memory", ""); |
| 1743 | } |
| 1744 | |
| 1745 | /* write header */ |
| 1746 | head = put_header(); |
| 1747 | |
| 1748 | /* set compression level in case it changed */ |
| 1749 | (void)deflateReset(strm); |
| 1750 | (void)deflateParams(strm, level, Z_DEFAULT_STRATEGY); |
| 1751 | |
| 1752 | /* do raw deflate and calculate check value */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1753 | got = 0; |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1754 | more = readn(ind, next, size); |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1755 | ulen = (unsigned)more; |
| 1756 | start = 0; |
| 1757 | clen = 0; |
| 1758 | block = 0; |
| 1759 | check = CHECK(0L, Z_NULL, 0); |
| 1760 | hash = RSYNCHIT; |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1761 | do { |
| 1762 | /* get data to compress, see if there is any more input */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1763 | if (got == 0) { |
| 1764 | scan = in; in = next; next = scan; |
| 1765 | strm->next_in = in + start; |
| 1766 | got = more; |
| 1767 | more = readn(ind, next, size); |
| 1768 | ulen += (unsigned long)more; |
| 1769 | start = 0; |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1770 | } |
| 1771 | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1772 | /* if rsyncable, compute hash until a hit or the end of the block */ |
| 1773 | left = 0; |
| 1774 | if (rsync && got) { |
| 1775 | scan = strm->next_in; |
| 1776 | left = got; |
| 1777 | do { |
| 1778 | if (left == 0) { |
| 1779 | /* went to the end -- if no more or no hit in size bytes, |
| 1780 | then proceed to do a flush or finish with got bytes */ |
| 1781 | if (more == 0 || got == size) |
| 1782 | break; |
| 1783 | |
| 1784 | /* fill in[] with what's left there and as much as possible |
| 1785 | from next[] -- set up to continue hash hit search */ |
| 1786 | memmove(in, strm->next_in, got); |
| 1787 | strm->next_in = in; |
| 1788 | scan = in + got; |
| 1789 | left = more > size - got ? size - got : more; |
| 1790 | memcpy(scan, next + start, left); |
| 1791 | got += left; |
| 1792 | more -= left; |
| 1793 | start += left; |
| 1794 | |
| 1795 | /* if that emptied the next buffer, try to refill it */ |
| 1796 | if (more == 0) { |
| 1797 | more = readn(ind, next, size); |
| 1798 | ulen += (unsigned long)more; |
| 1799 | start = 0; |
| 1800 | } |
| 1801 | } |
| 1802 | left--; |
| 1803 | hash = ((hash << 1) ^ *scan++) & RSYNCMASK; |
| 1804 | } while (hash != RSYNCHIT); |
| 1805 | got -= left; |
| 1806 | } |
| 1807 | |
| 1808 | /* clear history for --independent option */ |
| 1809 | if (!setdict) { |
| 1810 | block += got; |
| 1811 | if (block > size) { |
| 1812 | (void)deflateReset(strm); |
| 1813 | block = got; |
| 1814 | } |
| 1815 | } |
| 1816 | |
| 1817 | /* compress MAXP2-size chunks in case unsigned type is small */ |
| 1818 | while (got > MAXP2) { |
| 1819 | strm->avail_in = MAXP2; |
| 1820 | check = CHECK(check, strm->next_in, strm->avail_in); |
| 1821 | DEFLATE_WRITE(Z_NO_FLUSH); |
| 1822 | got -= MAXP2; |
| 1823 | } |
| 1824 | |
| 1825 | /* compress the remainder, emit a block -- finish if end of input */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1826 | strm->avail_in = (unsigned)got; |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1827 | got = left; |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1828 | check = CHECK(check, strm->next_in, strm->avail_in); |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1829 | if (more || got) { |
| 1830 | #if ZLIB_VERNUM >= 0x1260 |
| 1831 | DEFLATE_WRITE(Z_BLOCK); |
| 1832 | (void)deflatePending(strm, Z_NULL, &bits); |
| 1833 | if (bits & 1) |
| 1834 | DEFLATE_WRITE(Z_SYNC_FLUSH); |
| 1835 | else if (bits & 7) { |
| 1836 | do { |
| 1837 | bits = deflatePrime(strm, 10, 2); |
| 1838 | assert(bits == Z_OK); |
| 1839 | (void)deflatePending(strm, Z_NULL, &bits); |
| 1840 | } while (bits & 7); |
| 1841 | DEFLATE_WRITE(Z_NO_FLUSH); |
| 1842 | } |
| 1843 | #else |
| 1844 | DEFLATE_WRITE(Z_SYNC_FLUSH); |
| 1845 | #endif |
| 1846 | } |
| 1847 | else |
| 1848 | DEFLATE_WRITE(Z_FINISH); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1849 | |
| 1850 | /* do until no more input */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1851 | } while (more || got); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1852 | |
| 1853 | /* write trailer */ |
| 1854 | put_trailer(ulen, clen, check, head); |
| 1855 | } |
| 1856 | |
| 1857 | /* --- decompression --- */ |
| 1858 | |
| 1859 | /* globals for decompression and listing buffered reading */ |
| 1860 | #define BUF 32768U /* input buffer size */ |
| 1861 | local unsigned char in_buf[BUF]; /* input buffer */ |
| 1862 | local unsigned char *in_next; /* next unused byte in buffer */ |
| 1863 | local size_t in_left; /* number of unused bytes in buffer */ |
| 1864 | local int in_eof; /* true if reached end of file on input */ |
| 1865 | local int in_short; /* true if last read didn't fill buffer */ |
| 1866 | local off_t in_tot; /* total bytes read from input */ |
| 1867 | local off_t out_tot; /* total bytes written to output */ |
| 1868 | local unsigned long out_check; /* check value of output */ |
| 1869 | |
| 1870 | #ifndef NOTHREAD |
| 1871 | /* parallel reading */ |
| 1872 | |
| 1873 | local unsigned char in_buf2[BUF]; /* second buffer for parallel reads */ |
| 1874 | local size_t in_len; /* data waiting in next buffer */ |
| 1875 | local int in_which; /* -1: start, 0: in_buf2, 1: in_buf */ |
| 1876 | local lock *load_state; /* value = 0 to wait, 1 to read a buffer */ |
| 1877 | local thread *load_thread; /* load_read() thread for joining */ |
| 1878 | |
| 1879 | /* parallel read thread */ |
| 1880 | local void load_read(void *dummy) |
| 1881 | { |
| 1882 | size_t len; |
| 1883 | |
| 1884 | (void)dummy; |
| 1885 | |
| 1886 | Trace(("-- launched decompress read thread")); |
| 1887 | do { |
| 1888 | possess(load_state); |
| 1889 | wait_for(load_state, TO_BE, 1); |
| 1890 | in_len = len = readn(ind, in_which ? in_buf : in_buf2, BUF); |
| 1891 | Trace(("-- decompress read thread read %lu bytes", len)); |
| 1892 | twist(load_state, TO, 0); |
| 1893 | } while (len == BUF); |
| 1894 | Trace(("-- exited decompress read thread")); |
| 1895 | } |
| 1896 | |
| 1897 | #endif |
| 1898 | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1899 | /* load() is called when the input has been consumed in order to provide more |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1900 | input data: load the input buffer with BUF or less bytes (less if at end of |
| 1901 | file) from the file ind, set in_next to point to the in_left bytes read, |
| 1902 | update in_tot, and return in_left -- in_eof is set to true when in_left has |
| 1903 | gone to zero and there is no more data left to read from ind */ |
| 1904 | local size_t load(void) |
| 1905 | { |
| 1906 | /* if already detected end of file, do nothing */ |
| 1907 | if (in_short) { |
| 1908 | in_eof = 1; |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1909 | in_left = 0; |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1910 | return 0; |
| 1911 | } |
| 1912 | |
| 1913 | #ifndef NOTHREAD |
| 1914 | /* if first time in or procs == 1, read a buffer to have something to |
| 1915 | return, otherwise wait for the previous read job to complete */ |
| 1916 | if (procs > 1) { |
| 1917 | /* if first time, fire up the read thread, ask for a read */ |
| 1918 | if (in_which == -1) { |
| 1919 | in_which = 1; |
| 1920 | load_state = new_lock(1); |
| 1921 | load_thread = launch(load_read, NULL); |
| 1922 | } |
| 1923 | |
| 1924 | /* wait for the previously requested read to complete */ |
| 1925 | possess(load_state); |
| 1926 | wait_for(load_state, TO_BE, 0); |
| 1927 | release(load_state); |
| 1928 | |
| 1929 | /* set up input buffer with the data just read */ |
| 1930 | in_next = in_which ? in_buf : in_buf2; |
| 1931 | in_left = in_len; |
| 1932 | |
| 1933 | /* if not at end of file, alert read thread to load next buffer, |
| 1934 | alternate between in_buf and in_buf2 */ |
| 1935 | if (in_len == BUF) { |
| 1936 | in_which = 1 - in_which; |
| 1937 | possess(load_state); |
| 1938 | twist(load_state, TO, 1); |
| 1939 | } |
| 1940 | |
| 1941 | /* at end of file -- join read thread (already exited), clean up */ |
| 1942 | else { |
| 1943 | join(load_thread); |
| 1944 | free_lock(load_state); |
| 1945 | in_which = -1; |
| 1946 | } |
| 1947 | } |
| 1948 | else |
| 1949 | #endif |
| 1950 | { |
| 1951 | /* don't use threads -- simply read a buffer into in_buf */ |
| 1952 | in_left = readn(ind, in_next = in_buf, BUF); |
| 1953 | } |
| 1954 | |
| 1955 | /* note end of file */ |
| 1956 | if (in_left < BUF) { |
| 1957 | in_short = 1; |
| 1958 | |
| 1959 | /* if we got bupkis, now is the time to mark eof */ |
| 1960 | if (in_left == 0) |
| 1961 | in_eof = 1; |
| 1962 | } |
| 1963 | |
| 1964 | /* update the total and return the available bytes */ |
| 1965 | in_tot += in_left; |
| 1966 | return in_left; |
| 1967 | } |
| 1968 | |
| 1969 | /* initialize for reading new input */ |
| 1970 | local void in_init(void) |
| 1971 | { |
| 1972 | in_left = 0; |
| 1973 | in_eof = 0; |
| 1974 | in_short = 0; |
| 1975 | in_tot = 0; |
| 1976 | #ifndef NOTHREAD |
| 1977 | in_which = -1; |
| 1978 | #endif |
| 1979 | } |
| 1980 | |
| 1981 | /* buffered reading macros for decompression and listing */ |
| 1982 | #define GET() (in_eof || (in_left == 0 && load() == 0) ? EOF : \ |
| 1983 | (in_left--, *in_next++)) |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1984 | #define GET2() (tmp2 = GET(), tmp2 + ((unsigned)(GET()) << 8)) |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1985 | #define GET4() (tmp4 = GET2(), tmp4 + ((unsigned long)(GET2()) << 16)) |
| 1986 | #define SKIP(dist) \ |
| 1987 | do { \ |
| 1988 | size_t togo = (dist); \ |
| 1989 | while (togo > in_left) { \ |
| 1990 | togo -= in_left; \ |
| 1991 | if (load() == 0) \ |
| 1992 | return -1; \ |
| 1993 | } \ |
| 1994 | in_left -= togo; \ |
| 1995 | in_next += togo; \ |
| 1996 | } while (0) |
| 1997 | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 1998 | /* pull LSB order or MSB order integers from an unsigned char buffer */ |
| 1999 | #define PULL2L(p) ((p)[0] + ((unsigned)((p)[1]) << 8)) |
| 2000 | #define PULL4L(p) (PULL2L(p) + ((unsigned long)(PULL2L((p) + 2)) << 16)) |
| 2001 | #define PULL2M(p) (((unsigned)((p)[0]) << 8) + (p)[1]) |
| 2002 | #define PULL4M(p) (((unsigned long)(PULL2M(p)) << 16) + PULL2M((p) + 2)) |
| 2003 | |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 2004 | /* convert MS-DOS date and time to a Unix time, assuming current timezone |
| 2005 | (you got a better idea?) */ |
| 2006 | local time_t dos2time(unsigned long dos) |
| 2007 | { |
| 2008 | struct tm tm; |
| 2009 | |
| 2010 | if (dos == 0) |
| 2011 | return time(NULL); |
| 2012 | tm.tm_year = ((int)(dos >> 25) & 0x7f) + 80; |
| 2013 | tm.tm_mon = ((int)(dos >> 21) & 0xf) - 1; |
| 2014 | tm.tm_mday = (int)(dos >> 16) & 0x1f; |
| 2015 | tm.tm_hour = (int)(dos >> 11) & 0x1f; |
| 2016 | tm.tm_min = (int)(dos >> 5) & 0x3f; |
| 2017 | tm.tm_sec = (int)(dos << 1) & 0x3e; |
| 2018 | tm.tm_isdst = -1; /* figure out if DST or not */ |
| 2019 | return mktime(&tm); |
| 2020 | } |
| 2021 | |
| 2022 | /* convert an unsigned 32-bit integer to signed, even if long > 32 bits */ |
| 2023 | local long tolong(unsigned long val) |
| 2024 | { |
| 2025 | return (long)(val & 0x7fffffffUL) - (long)(val & 0x80000000UL); |
| 2026 | } |
| 2027 | |
| 2028 | #define LOW32 0xffffffffUL |
| 2029 | |
| 2030 | /* process zip extra field to extract zip64 lengths and Unix mod time */ |
| 2031 | local int read_extra(unsigned len, int save) |
| 2032 | { |
| 2033 | unsigned id, size, tmp2; |
| 2034 | unsigned long tmp4; |
| 2035 | |
| 2036 | /* process extra blocks */ |
| 2037 | while (len >= 4) { |
| 2038 | id = GET2(); |
| 2039 | size = GET2(); |
| 2040 | if (in_eof) |
| 2041 | return -1; |
| 2042 | len -= 4; |
| 2043 | if (size > len) |
| 2044 | break; |
| 2045 | len -= size; |
| 2046 | if (id == 0x0001) { |
| 2047 | /* Zip64 Extended Information Extra Field */ |
| 2048 | if (zip_ulen == LOW32 && size >= 8) { |
| 2049 | zip_ulen = GET4(); |
| 2050 | SKIP(4); |
| 2051 | size -= 8; |
| 2052 | } |
| 2053 | if (zip_clen == LOW32 && size >= 8) { |
| 2054 | zip_clen = GET4(); |
| 2055 | SKIP(4); |
| 2056 | size -= 8; |
| 2057 | } |
| 2058 | } |
| 2059 | if (save) { |
| 2060 | if ((id == 0x000d || id == 0x5855) && size >= 8) { |
| 2061 | /* PKWare Unix or Info-ZIP Type 1 Unix block */ |
| 2062 | SKIP(4); |
| 2063 | stamp = tolong(GET4()); |
| 2064 | size -= 8; |
| 2065 | } |
| 2066 | if (id == 0x5455 && size >= 5) { |
| 2067 | /* Extended Timestamp block */ |
| 2068 | size--; |
| 2069 | if (GET() & 1) { |
| 2070 | stamp = tolong(GET4()); |
| 2071 | size -= 4; |
| 2072 | } |
| 2073 | } |
| 2074 | } |
| 2075 | SKIP(size); |
| 2076 | } |
| 2077 | SKIP(len); |
| 2078 | return 0; |
| 2079 | } |
| 2080 | |
| 2081 | /* read a gzip, zip, zlib, or lzw header from ind and extract useful |
| 2082 | information, return the method -- or on error return negative: -1 is |
| 2083 | immediate EOF, -2 is not a recognized compressed format, -3 is premature EOF |
| 2084 | within the header, -4 is unexpected header flag values; a method of 256 is |
| 2085 | lzw -- set form to indicate gzip, zlib, or zip */ |
| 2086 | local int get_header(int save) |
| 2087 | { |
| 2088 | unsigned magic; /* magic header */ |
| 2089 | int method; /* compression method */ |
| 2090 | int flags; /* header flags */ |
| 2091 | unsigned fname, extra; /* name and extra field lengths */ |
| 2092 | unsigned tmp2; /* for macro */ |
| 2093 | unsigned long tmp4; /* for macro */ |
| 2094 | |
| 2095 | /* clear return information */ |
| 2096 | if (save) { |
| 2097 | stamp = 0; |
| 2098 | RELEASE(hname); |
| 2099 | } |
| 2100 | |
| 2101 | /* see if it's a gzip, zlib, or lzw file */ |
| 2102 | form = 0; |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 2103 | magic1 = GET(); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 2104 | if (in_eof) |
| 2105 | return -1; |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 2106 | magic = magic1 << 8; |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 2107 | magic += GET(); |
| 2108 | if (in_eof) |
| 2109 | return -2; |
| 2110 | if (magic % 31 == 0) { /* it's zlib */ |
| 2111 | form = 1; |
| 2112 | return (int)((magic >> 8) & 0xf); |
| 2113 | } |
| 2114 | if (magic == 0x1f9d) /* it's lzw */ |
| 2115 | return 256; |
| 2116 | if (magic == 0x504b) { /* it's zip */ |
| 2117 | if (GET() != 3 || GET() != 4) |
| 2118 | return -3; |
| 2119 | SKIP(2); |
| 2120 | flags = GET2(); |
| 2121 | if (in_eof) |
| 2122 | return -3; |
| 2123 | if (flags & 0xfff0) |
| 2124 | return -4; |
| 2125 | method = GET2(); |
| 2126 | if (flags & 1) /* encrypted */ |
| 2127 | method = 255; /* mark as unknown method */ |
| 2128 | if (in_eof) |
| 2129 | return -3; |
| 2130 | if (save) |
| 2131 | stamp = dos2time(GET4()); |
| 2132 | else |
| 2133 | SKIP(4); |
| 2134 | zip_crc = GET4(); |
| 2135 | zip_clen = GET4(); |
| 2136 | zip_ulen = GET4(); |
| 2137 | fname = GET2(); |
| 2138 | extra = GET2(); |
| 2139 | if (save) { |
| 2140 | char *next = hname = malloc(fname + 1); |
| 2141 | if (hname == NULL) |
| 2142 | bail("not enough memory", ""); |
| 2143 | while (fname > in_left) { |
| 2144 | memcpy(next, in_next, in_left); |
| 2145 | fname -= in_left; |
| 2146 | next += in_left; |
| 2147 | if (load() == 0) |
| 2148 | return -3; |
| 2149 | } |
| 2150 | memcpy(next, in_next, fname); |
| 2151 | in_left -= fname; |
| 2152 | in_next += fname; |
| 2153 | next += fname; |
| 2154 | *next = 0; |
| 2155 | } |
| 2156 | else |
| 2157 | SKIP(fname); |
| 2158 | read_extra(extra, save); |
| 2159 | form = 2 + ((flags & 8) >> 3); |
| 2160 | return in_eof ? -3 : method; |
| 2161 | } |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 2162 | if (magic != 0x1f8b) { /* not gzip */ |
| 2163 | in_left++; /* unget second magic byte */ |
| 2164 | in_next--; |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 2165 | return -2; |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 2166 | } |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 2167 | |
| 2168 | /* it's gzip -- get method and flags */ |
| 2169 | method = GET(); |
| 2170 | flags = GET(); |
| 2171 | if (in_eof) |
| 2172 | return -1; |
| 2173 | if (flags & 0xe0) |
| 2174 | return -4; |
| 2175 | |
| 2176 | /* get time stamp */ |
| 2177 | if (save) |
| 2178 | stamp = tolong(GET4()); |
| 2179 | else |
| 2180 | SKIP(4); |
| 2181 | |
| 2182 | /* skip extra field and OS */ |
| 2183 | SKIP(2); |
| 2184 | |
| 2185 | /* skip extra field, if present */ |
| 2186 | if (flags & 4) { |
| 2187 | extra = GET2(); |
| 2188 | if (in_eof) |
| 2189 | return -3; |
| 2190 | SKIP(extra); |
| 2191 | } |
| 2192 | |
| 2193 | /* read file name, if present, into allocated memory */ |
| 2194 | if ((flags & 8) && save) { |
| 2195 | unsigned char *end; |
| 2196 | size_t copy, have, size = 128; |
| 2197 | hname = malloc(size); |
| 2198 | if (hname == NULL) |
| 2199 | bail("not enough memory", ""); |
| 2200 | have = 0; |
| 2201 | do { |
| 2202 | if (in_left == 0 && load() == 0) |
| 2203 | return -3; |
| 2204 | end = memchr(in_next, 0, in_left); |
| 2205 | copy = end == NULL ? in_left : (size_t)(end - in_next) + 1; |
| 2206 | if (have + copy > size) { |
| 2207 | while (have + copy > (size <<= 1)) |
| 2208 | ; |
| 2209 | hname = realloc(hname, size); |
| 2210 | if (hname == NULL) |
| 2211 | bail("not enough memory", ""); |
| 2212 | } |
| 2213 | memcpy(hname + have, in_next, copy); |
| 2214 | have += copy; |
| 2215 | in_left -= copy; |
| 2216 | in_next += copy; |
| 2217 | } while (end == NULL); |
| 2218 | } |
| 2219 | else if (flags & 8) |
| 2220 | while (GET() != 0) |
| 2221 | if (in_eof) |
| 2222 | return -3; |
| 2223 | |
| 2224 | /* skip comment */ |
| 2225 | if (flags & 16) |
| 2226 | while (GET() != 0) |
| 2227 | if (in_eof) |
| 2228 | return -3; |
| 2229 | |
| 2230 | /* skip header crc */ |
| 2231 | if (flags & 2) |
| 2232 | SKIP(2); |
| 2233 | |
| 2234 | /* return compression method */ |
| 2235 | return method; |
| 2236 | } |
| 2237 | |
| 2238 | /* --- list contents of compressed input (gzip, zlib, or lzw) */ |
| 2239 | |
| 2240 | /* find standard compressed file suffix, return length of suffix */ |
| 2241 | local size_t compressed_suffix(char *nm) |
| 2242 | { |
| 2243 | size_t len; |
| 2244 | |
| 2245 | len = strlen(nm); |
| 2246 | if (len > 4) { |
| 2247 | nm += len - 4; |
| 2248 | len = 4; |
| 2249 | if (strcmp(nm, ".zip") == 0 || strcmp(nm, ".ZIP") == 0 || |
| 2250 | strcmp(nm, ".tgz") == 0) |
| 2251 | return 4; |
| 2252 | } |
| 2253 | if (len > 3) { |
| 2254 | nm += len - 3; |
| 2255 | len = 3; |
| 2256 | if (strcmp(nm, ".gz") == 0 || strcmp(nm, "-gz") == 0 || |
| 2257 | strcmp(nm, ".zz") == 0 || strcmp(nm, "-zz") == 0) |
| 2258 | return 3; |
| 2259 | } |
| 2260 | if (len > 2) { |
| 2261 | nm += len - 2; |
| 2262 | if (strcmp(nm, ".z") == 0 || strcmp(nm, "-z") == 0 || |
| 2263 | strcmp(nm, "_z") == 0 || strcmp(nm, ".Z") == 0) |
| 2264 | return 2; |
| 2265 | } |
| 2266 | return 0; |
| 2267 | } |
| 2268 | |
| 2269 | /* listing file name lengths for -l and -lv */ |
| 2270 | #define NAMEMAX1 48 /* name display limit at verbosity 1 */ |
| 2271 | #define NAMEMAX2 16 /* name display limit at verbosity 2 */ |
| 2272 | |
| 2273 | /* print gzip or lzw file information */ |
| 2274 | local void show_info(int method, unsigned long check, off_t len, int cont) |
| 2275 | { |
| 2276 | size_t max; /* maximum name length for current verbosity */ |
| 2277 | size_t n; /* name length without suffix */ |
| 2278 | time_t now; /* for getting current year */ |
| 2279 | char mod[26]; /* modification time in text */ |
| 2280 | char name[NAMEMAX1+1]; /* header or file name, possibly truncated */ |
| 2281 | |
| 2282 | /* create abbreviated name from header file name or actual file name */ |
| 2283 | max = verbosity > 1 ? NAMEMAX2 : NAMEMAX1; |
| 2284 | memset(name, 0, max + 1); |
| 2285 | if (cont) |
| 2286 | strncpy(name, "<...>", max + 1); |
| 2287 | else if (hname == NULL) { |
| 2288 | n = strlen(in) - compressed_suffix(in); |
| 2289 | strncpy(name, in, n > max + 1 ? max + 1 : n); |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 2290 | if (strcmp(in + n, ".tgz") == 0 && n < max + 1) |
| 2291 | strncpy(name + n, ".tar", max + 1 - n); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 2292 | } |
| 2293 | else |
| 2294 | strncpy(name, hname, max + 1); |
| 2295 | if (name[max]) |
| 2296 | strcpy(name + max - 3, "..."); |
| 2297 | |
| 2298 | /* convert time stamp to text */ |
| 2299 | if (stamp) { |
| 2300 | strcpy(mod, ctime(&stamp)); |
| 2301 | now = time(NULL); |
| 2302 | if (strcmp(mod + 20, ctime(&now) + 20) != 0) |
| 2303 | strcpy(mod + 11, mod + 19); |
| 2304 | } |
| 2305 | else |
| 2306 | strcpy(mod + 4, "------ -----"); |
| 2307 | mod[16] = 0; |
| 2308 | |
| 2309 | /* if first time, print header */ |
| 2310 | if (first) { |
| 2311 | if (verbosity > 1) |
| 2312 | fputs("method check timestamp ", stdout); |
| 2313 | if (verbosity > 0) |
| 2314 | puts("compressed original reduced name"); |
| 2315 | first = 0; |
| 2316 | } |
| 2317 | |
| 2318 | /* print information */ |
| 2319 | if (verbosity > 1) { |
| 2320 | if (form == 3 && !decode) |
| 2321 | printf("zip%3d -------- %s ", method, mod + 4); |
| 2322 | else if (form > 1) |
| 2323 | printf("zip%3d %08lx %s ", method, check, mod + 4); |
| 2324 | else if (form) |
| 2325 | printf("zlib%2d %08lx %s ", method, check, mod + 4); |
| 2326 | else if (method == 256) |
| 2327 | printf("lzw -------- %s ", mod + 4); |
| 2328 | else |
| 2329 | printf("gzip%2d %08lx %s ", method, check, mod + 4); |
| 2330 | } |
| 2331 | if (verbosity > 0) { |
| 2332 | if ((form == 3 && !decode) || |
| 2333 | (method == 8 && in_tot > (len + (len >> 10) + 12)) || |
| 2334 | (method == 256 && in_tot > len + (len >> 1) + 3)) |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 2335 | #if __STDC_VERSION__-0 >= 199901L || __GNUC__-0 >= 3 |
| 2336 | printf("%10jd %10jd? unk %s\n", |
| 2337 | (intmax_t)in_tot, (intmax_t)len, name); |
| 2338 | else |
| 2339 | printf("%10jd %10jd %6.1f%% %s\n", |
| 2340 | (intmax_t)in_tot, (intmax_t)len, |
| 2341 | len == 0 ? 0 : 100 * (len - in_tot)/(double)len, |
| 2342 | name); |
| 2343 | #else |
| 2344 | printf(sizeof(off_t) == sizeof(long) ? |
| 2345 | "%10ld %10ld? unk %s\n" : "%10lld %10lld? unk %s\n", |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 2346 | in_tot, len, name); |
| 2347 | else |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 2348 | printf(sizeof(off_t) == sizeof(long) ? |
| 2349 | "%10ld %10ld %6.1f%% %s\n" : "%10lld %10lld %6.1f%% %s\n", |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 2350 | in_tot, len, |
| 2351 | len == 0 ? 0 : 100 * (len - in_tot)/(double)len, |
| 2352 | name); |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 2353 | #endif |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 2354 | } |
| 2355 | } |
| 2356 | |
| 2357 | /* list content information about the gzip file at ind (only works if the gzip |
| 2358 | file contains a single gzip stream with no junk at the end, and only works |
| 2359 | well if the uncompressed length is less than 4 GB) */ |
| 2360 | local void list_info(void) |
| 2361 | { |
| 2362 | int method; /* get_header() return value */ |
| 2363 | size_t n; /* available trailer bytes */ |
| 2364 | off_t at; /* used to calculate compressed length */ |
| 2365 | unsigned char tail[8]; /* trailer containing check and length */ |
| 2366 | unsigned long check, len; /* check value and length from trailer */ |
| 2367 | |
| 2368 | /* initialize input buffer */ |
| 2369 | in_init(); |
| 2370 | |
| 2371 | /* read header information and position input after header */ |
| 2372 | method = get_header(1); |
| 2373 | if (method < 0) { |
| 2374 | RELEASE(hname); |
| 2375 | if (method != -1 && verbosity > 1) |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 2376 | complain("%s not a compressed file -- skipping", in); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 2377 | return; |
| 2378 | } |
| 2379 | |
| 2380 | /* list zip file */ |
| 2381 | if (form > 1) { |
| 2382 | in_tot = zip_clen; |
| 2383 | show_info(method, zip_crc, zip_ulen, 0); |
| 2384 | return; |
| 2385 | } |
| 2386 | |
| 2387 | /* list zlib file */ |
| 2388 | if (form) { |
| 2389 | at = lseek(ind, 0, SEEK_END); |
| 2390 | if (at == -1) { |
| 2391 | check = 0; |
| 2392 | do { |
| 2393 | len = in_left < 4 ? in_left : 4; |
| 2394 | in_next += in_left - len; |
| 2395 | while (len--) |
| 2396 | check = (check << 8) + *in_next++; |
| 2397 | } while (load() != 0); |
| 2398 | check &= LOW32; |
| 2399 | } |
| 2400 | else { |
| 2401 | in_tot = at; |
| 2402 | lseek(ind, -4, SEEK_END); |
| 2403 | readn(ind, tail, 4); |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 2404 | check = PULL4M(tail); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 2405 | } |
| 2406 | in_tot -= 6; |
| 2407 | show_info(method, check, 0, 0); |
| 2408 | return; |
| 2409 | } |
| 2410 | |
| 2411 | /* list lzw file */ |
| 2412 | if (method == 256) { |
| 2413 | at = lseek(ind, 0, SEEK_END); |
| 2414 | if (at == -1) |
| 2415 | while (load() != 0) |
| 2416 | ; |
| 2417 | else |
| 2418 | in_tot = at; |
| 2419 | in_tot -= 3; |
| 2420 | show_info(method, 0, 0, 0); |
| 2421 | return; |
| 2422 | } |
| 2423 | |
| 2424 | /* skip to end to get trailer (8 bytes), compute compressed length */ |
| 2425 | if (in_short) { /* whole thing already read */ |
| 2426 | if (in_left < 8) { |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 2427 | complain("%s not a valid gzip file -- skipping", in); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 2428 | return; |
| 2429 | } |
| 2430 | in_tot = in_left - 8; /* compressed size */ |
| 2431 | memcpy(tail, in_next + (in_left - 8), 8); |
| 2432 | } |
| 2433 | else if ((at = lseek(ind, -8, SEEK_END)) != -1) { |
| 2434 | in_tot = at - in_tot + in_left; /* compressed size */ |
| 2435 | readn(ind, tail, 8); /* get trailer */ |
| 2436 | } |
| 2437 | else { /* can't seek */ |
| 2438 | at = in_tot - in_left; /* save header size */ |
| 2439 | do { |
| 2440 | n = in_left < 8 ? in_left : 8; |
| 2441 | memcpy(tail, in_next + (in_left - n), n); |
| 2442 | load(); |
| 2443 | } while (in_left == BUF); /* read until end */ |
| 2444 | if (in_left < 8) { |
| 2445 | if (n + in_left < 8) { |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 2446 | complain("%s not a valid gzip file -- skipping", in); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 2447 | return; |
| 2448 | } |
| 2449 | if (in_left) { |
| 2450 | if (n + in_left > 8) |
| 2451 | memcpy(tail, tail + n - (8 - in_left), 8 - in_left); |
| 2452 | memcpy(tail + 8 - in_left, in_next, in_left); |
| 2453 | } |
| 2454 | } |
| 2455 | else |
| 2456 | memcpy(tail, in_next + (in_left - 8), 8); |
| 2457 | in_tot -= at + 8; |
| 2458 | } |
| 2459 | if (in_tot < 2) { |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 2460 | complain("%s not a valid gzip file -- skipping", in); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 2461 | return; |
| 2462 | } |
| 2463 | |
| 2464 | /* convert trailer to check and uncompressed length (modulo 2^32) */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 2465 | check = PULL4L(tail); |
| 2466 | len = PULL4L(tail + 4); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 2467 | |
| 2468 | /* list information about contents */ |
| 2469 | show_info(method, check, len, 0); |
| 2470 | RELEASE(hname); |
| 2471 | } |
| 2472 | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 2473 | /* --- copy input to output (when acting like cat) --- */ |
| 2474 | |
| 2475 | local void cat(void) |
| 2476 | { |
| 2477 | /* write first magic byte (if we're here, there's at least one byte) */ |
| 2478 | writen(outd, &magic1, 1); |
| 2479 | out_tot = 1; |
| 2480 | |
| 2481 | /* copy the remainder of the input to the output (if there were any more |
| 2482 | bytes of input, then in_left is non-zero and in_next is pointing to the |
| 2483 | second magic byte) */ |
| 2484 | while (in_left) { |
| 2485 | writen(outd, in_next, in_left); |
| 2486 | out_tot += in_left; |
| 2487 | in_left = 0; |
| 2488 | load(); |
| 2489 | } |
| 2490 | } |
| 2491 | |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 2492 | /* --- decompress deflate input --- */ |
| 2493 | |
| 2494 | /* call-back input function for inflateBack() */ |
| 2495 | local unsigned inb(void *desc, unsigned char **buf) |
| 2496 | { |
| 2497 | (void)desc; |
| 2498 | load(); |
| 2499 | *buf = in_next; |
| 2500 | return in_left; |
| 2501 | } |
| 2502 | |
| 2503 | /* output buffers and window for infchk() and unlzw() */ |
| 2504 | #define OUTSIZE 32768U /* must be at least 32K for inflateBack() window */ |
| 2505 | local unsigned char out_buf[OUTSIZE]; |
| 2506 | |
| 2507 | #ifndef NOTHREAD |
| 2508 | /* output data for parallel write and check */ |
| 2509 | local unsigned char out_copy[OUTSIZE]; |
| 2510 | local size_t out_len; |
| 2511 | |
| 2512 | /* outb threads states */ |
| 2513 | local lock *outb_write_more = NULL; |
| 2514 | local lock *outb_check_more; |
| 2515 | |
| 2516 | /* output write thread */ |
| 2517 | local void outb_write(void *dummy) |
| 2518 | { |
| 2519 | size_t len; |
| 2520 | |
| 2521 | (void)dummy; |
| 2522 | |
| 2523 | Trace(("-- launched decompress write thread")); |
| 2524 | do { |
| 2525 | possess(outb_write_more); |
| 2526 | wait_for(outb_write_more, TO_BE, 1); |
| 2527 | len = out_len; |
| 2528 | if (len && decode == 1) |
| 2529 | writen(outd, out_copy, len); |
| 2530 | Trace(("-- decompress wrote %lu bytes", len)); |
| 2531 | twist(outb_write_more, TO, 0); |
| 2532 | } while (len); |
| 2533 | Trace(("-- exited decompress write thread")); |
| 2534 | } |
| 2535 | |
| 2536 | /* output check thread */ |
| 2537 | local void outb_check(void *dummy) |
| 2538 | { |
| 2539 | size_t len; |
| 2540 | |
| 2541 | (void)dummy; |
| 2542 | |
| 2543 | Trace(("-- launched decompress check thread")); |
| 2544 | do { |
| 2545 | possess(outb_check_more); |
| 2546 | wait_for(outb_check_more, TO_BE, 1); |
| 2547 | len = out_len; |
| 2548 | out_check = CHECK(out_check, out_copy, len); |
| 2549 | Trace(("-- decompress checked %lu bytes", len)); |
| 2550 | twist(outb_check_more, TO, 0); |
| 2551 | } while (len); |
| 2552 | Trace(("-- exited decompress check thread")); |
| 2553 | } |
| 2554 | #endif |
| 2555 | |
| 2556 | /* call-back output function for inflateBack() -- wait for the last write and |
| 2557 | check calculation to complete, copy the write buffer, and then alert the |
| 2558 | write and check threads and return for more decompression while that's |
| 2559 | going on (or just write and check if no threads or if proc == 1) */ |
| 2560 | local int outb(void *desc, unsigned char *buf, unsigned len) |
| 2561 | { |
| 2562 | #ifndef NOTHREAD |
| 2563 | static thread *wr, *ch; |
| 2564 | |
| 2565 | (void)desc; |
| 2566 | |
| 2567 | if (procs > 1) { |
| 2568 | /* if first time, initialize state and launch threads */ |
| 2569 | if (outb_write_more == NULL) { |
| 2570 | outb_write_more = new_lock(0); |
| 2571 | outb_check_more = new_lock(0); |
| 2572 | wr = launch(outb_write, NULL); |
| 2573 | ch = launch(outb_check, NULL); |
| 2574 | } |
| 2575 | |
| 2576 | /* wait for previous write and check threads to complete */ |
| 2577 | possess(outb_check_more); |
| 2578 | wait_for(outb_check_more, TO_BE, 0); |
| 2579 | possess(outb_write_more); |
| 2580 | wait_for(outb_write_more, TO_BE, 0); |
| 2581 | |
| 2582 | /* copy the output and alert the worker bees */ |
| 2583 | out_len = len; |
| 2584 | out_tot += len; |
| 2585 | memcpy(out_copy, buf, len); |
| 2586 | twist(outb_write_more, TO, 1); |
| 2587 | twist(outb_check_more, TO, 1); |
| 2588 | |
| 2589 | /* if requested with len == 0, clean up -- terminate and join write and |
| 2590 | check threads, free lock */ |
| 2591 | if (len == 0) { |
| 2592 | join(ch); |
| 2593 | join(wr); |
| 2594 | free_lock(outb_check_more); |
| 2595 | free_lock(outb_write_more); |
| 2596 | outb_write_more = NULL; |
| 2597 | } |
| 2598 | |
| 2599 | /* return for more decompression while last buffer is being written |
| 2600 | and having its check value calculated -- we wait for those to finish |
| 2601 | the next time this function is called */ |
| 2602 | return 0; |
| 2603 | } |
| 2604 | #endif |
| 2605 | |
| 2606 | /* if just one process or no threads, then do it without threads */ |
| 2607 | if (len) { |
| 2608 | if (decode == 1) |
| 2609 | writen(outd, buf, len); |
| 2610 | out_check = CHECK(out_check, buf, len); |
| 2611 | out_tot += len; |
| 2612 | } |
| 2613 | return 0; |
| 2614 | } |
| 2615 | |
| 2616 | /* inflate for decompression or testing -- decompress from ind to outd unless |
| 2617 | decode != 1, in which case just test ind, and then also list if list != 0; |
| 2618 | look for and decode multiple, concatenated gzip and/or zlib streams; |
| 2619 | read and check the gzip, zlib, or zip trailer */ |
| 2620 | local void infchk(void) |
| 2621 | { |
| 2622 | int ret, cont; |
| 2623 | unsigned long check, len; |
| 2624 | z_stream strm; |
| 2625 | unsigned tmp2; |
| 2626 | unsigned long tmp4; |
| 2627 | off_t clen; |
| 2628 | |
| 2629 | cont = 0; |
| 2630 | do { |
| 2631 | /* header already read -- set up for decompression */ |
| 2632 | in_tot = in_left; /* track compressed data length */ |
| 2633 | out_tot = 0; |
| 2634 | out_check = CHECK(0L, Z_NULL, 0); |
| 2635 | strm.zalloc = Z_NULL; |
| 2636 | strm.zfree = Z_NULL; |
| 2637 | strm.opaque = Z_NULL; |
| 2638 | ret = inflateBackInit(&strm, 15, out_buf); |
| 2639 | if (ret != Z_OK) |
| 2640 | bail("not enough memory", ""); |
| 2641 | |
| 2642 | /* decompress, compute lengths and check value */ |
| 2643 | strm.avail_in = in_left; |
| 2644 | strm.next_in = in_next; |
| 2645 | ret = inflateBack(&strm, inb, NULL, outb, NULL); |
| 2646 | if (ret != Z_STREAM_END) |
| 2647 | bail("corrupted input -- invalid deflate data: ", in); |
| 2648 | in_left = strm.avail_in; |
| 2649 | in_next = strm.next_in; |
| 2650 | inflateBackEnd(&strm); |
| 2651 | outb(NULL, NULL, 0); /* finish off final write and check */ |
| 2652 | |
| 2653 | /* compute compressed data length */ |
| 2654 | clen = in_tot - in_left; |
| 2655 | |
| 2656 | /* read and check trailer */ |
| 2657 | if (form > 1) { /* zip local trailer (if any) */ |
| 2658 | if (form == 3) { /* data descriptor follows */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 2659 | /* read original version of data descriptor */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 2660 | zip_crc = GET4(); |
| 2661 | zip_clen = GET4(); |
| 2662 | zip_ulen = GET4(); |
| 2663 | if (in_eof) |
| 2664 | bail("corrupted zip entry -- missing trailer: ", in); |
| 2665 | |
| 2666 | /* if crc doesn't match, try info-zip variant with sig */ |
| 2667 | if (zip_crc != out_check) { |
| 2668 | if (zip_crc != 0x08074b50UL || zip_clen != out_check) |
| 2669 | bail("corrupted zip entry -- crc32 mismatch: ", in); |
| 2670 | zip_crc = zip_clen; |
| 2671 | zip_clen = zip_ulen; |
| 2672 | zip_ulen = GET4(); |
| 2673 | } |
| 2674 | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 2675 | /* handle incredibly rare cases where crc equals signature */ |
| 2676 | else if (zip_crc == 0x08074b50UL && zip_clen == zip_crc && |
| 2677 | ((clen & LOW32) != zip_crc || zip_ulen == zip_crc)) { |
| 2678 | zip_crc = zip_clen; |
| 2679 | zip_clen = zip_ulen; |
| 2680 | zip_ulen = GET4(); |
| 2681 | } |
| 2682 | |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 2683 | /* if second length doesn't match, try 64-bit lengths */ |
| 2684 | if (zip_ulen != (out_tot & LOW32)) { |
| 2685 | zip_ulen = GET4(); |
| 2686 | (void)GET4(); |
| 2687 | } |
| 2688 | if (in_eof) |
| 2689 | bail("corrupted zip entry -- missing trailer: ", in); |
| 2690 | } |
| 2691 | if (zip_clen != (clen & LOW32) || zip_ulen != (out_tot & LOW32)) |
| 2692 | bail("corrupted zip entry -- length mismatch: ", in); |
| 2693 | check = zip_crc; |
| 2694 | } |
| 2695 | else if (form == 1) { /* zlib (big-endian) trailer */ |
| 2696 | check = (unsigned long)(GET()) << 24; |
| 2697 | check += (unsigned long)(GET()) << 16; |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 2698 | check += (unsigned)(GET()) << 8; |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 2699 | check += GET(); |
| 2700 | if (in_eof) |
| 2701 | bail("corrupted zlib stream -- missing trailer: ", in); |
| 2702 | if (check != out_check) |
| 2703 | bail("corrupted zlib stream -- adler32 mismatch: ", in); |
| 2704 | } |
| 2705 | else { /* gzip trailer */ |
| 2706 | check = GET4(); |
| 2707 | len = GET4(); |
| 2708 | if (in_eof) |
| 2709 | bail("corrupted gzip stream -- missing trailer: ", in); |
| 2710 | if (check != out_check) |
| 2711 | bail("corrupted gzip stream -- crc32 mismatch: ", in); |
| 2712 | if (len != (out_tot & LOW32)) |
| 2713 | bail("corrupted gzip stream -- length mismatch: ", in); |
| 2714 | } |
| 2715 | |
| 2716 | /* show file information if requested */ |
| 2717 | if (list) { |
| 2718 | in_tot = clen; |
| 2719 | show_info(8, check, out_tot, cont); |
| 2720 | cont = 1; |
| 2721 | } |
| 2722 | |
| 2723 | /* if a gzip or zlib entry follows a gzip or zlib entry, decompress it |
| 2724 | (don't replace saved header information from first entry) */ |
| 2725 | } while (form < 2 && (ret = get_header(0)) == 8 && form < 2); |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 2726 | |
| 2727 | /* gzip -cdf copies junk after gzip stream directly to output */ |
| 2728 | if (form < 2 && ret == -2 && force && pipeout && decode != 2 && !list) |
| 2729 | cat(); |
| 2730 | else if (ret != -1 && form < 2) |
| 2731 | complain("%s OK, has trailing junk which was ignored", in); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 2732 | } |
| 2733 | |
| 2734 | /* --- decompress Unix compress (LZW) input --- */ |
| 2735 | |
| 2736 | /* memory for unlzw() -- |
| 2737 | the first 256 entries of prefix[] and suffix[] are never used, could |
| 2738 | have offset the index, but it's faster to waste the memory */ |
| 2739 | unsigned short prefix[65536]; /* index to LZW prefix string */ |
| 2740 | unsigned char suffix[65536]; /* one-character LZW suffix */ |
| 2741 | unsigned char match[65280 + 2]; /* buffer for reversed match */ |
| 2742 | |
| 2743 | /* throw out what's left in the current bits byte buffer (this is a vestigial |
| 2744 | aspect of the compressed data format derived from an implementation that |
| 2745 | made use of a special VAX machine instruction!) */ |
| 2746 | #define FLUSHCODE() \ |
| 2747 | do { \ |
| 2748 | left = 0; \ |
| 2749 | rem = 0; \ |
| 2750 | if (chunk > in_left) { \ |
| 2751 | chunk -= in_left; \ |
| 2752 | if (load() == 0) \ |
| 2753 | break; \ |
| 2754 | if (chunk > in_left) { \ |
| 2755 | chunk = in_left = 0; \ |
| 2756 | break; \ |
| 2757 | } \ |
| 2758 | } \ |
| 2759 | in_left -= chunk; \ |
| 2760 | in_next += chunk; \ |
| 2761 | chunk = 0; \ |
| 2762 | } while (0) |
| 2763 | |
| 2764 | /* Decompress a compress (LZW) file from ind to outd. The compress magic |
| 2765 | header (two bytes) has already been read and verified. */ |
| 2766 | local void unlzw(void) |
| 2767 | { |
| 2768 | int got; /* byte just read by GET() */ |
| 2769 | unsigned chunk; /* bytes left in current chunk */ |
| 2770 | int left; /* bits left in rem */ |
| 2771 | unsigned rem; /* unused bits from input */ |
| 2772 | int bits; /* current bits per code */ |
| 2773 | unsigned code; /* code, table traversal index */ |
| 2774 | unsigned mask; /* mask for current bits codes */ |
| 2775 | int max; /* maximum bits per code for this stream */ |
| 2776 | int flags; /* compress flags, then block compress flag */ |
| 2777 | unsigned end; /* last valid entry in prefix/suffix tables */ |
| 2778 | unsigned temp; /* current code */ |
| 2779 | unsigned prev; /* previous code */ |
| 2780 | unsigned final; /* last character written for previous code */ |
| 2781 | unsigned stack; /* next position for reversed string */ |
| 2782 | unsigned outcnt; /* bytes in output buffer */ |
| 2783 | unsigned char *p; |
| 2784 | |
| 2785 | /* process remainder of compress header -- a flags byte */ |
| 2786 | out_tot = 0; |
| 2787 | flags = GET(); |
| 2788 | if (in_eof) |
| 2789 | bail("missing lzw data: ", in); |
| 2790 | if (flags & 0x60) |
| 2791 | bail("unknown lzw flags set: ", in); |
| 2792 | max = flags & 0x1f; |
| 2793 | if (max < 9 || max > 16) |
| 2794 | bail("lzw bits out of range: ", in); |
| 2795 | if (max == 9) /* 9 doesn't really mean 9 */ |
| 2796 | max = 10; |
| 2797 | flags &= 0x80; /* true if block compress */ |
| 2798 | |
| 2799 | /* clear table */ |
| 2800 | bits = 9; |
| 2801 | mask = 0x1ff; |
| 2802 | end = flags ? 256 : 255; |
| 2803 | |
| 2804 | /* set up: get first 9-bit code, which is the first decompressed byte, but |
| 2805 | don't create a table entry until the next code */ |
| 2806 | got = GET(); |
| 2807 | if (in_eof) /* no compressed data is ok */ |
| 2808 | return; |
| 2809 | final = prev = (unsigned)got; /* low 8 bits of code */ |
| 2810 | got = GET(); |
| 2811 | if (in_eof || (got & 1) != 0) /* missing a bit or code >= 256 */ |
| 2812 | bail("invalid lzw code: ", in); |
| 2813 | rem = (unsigned)got >> 1; /* remaining 7 bits */ |
| 2814 | left = 7; |
| 2815 | chunk = bits - 2; /* 7 bytes left in this chunk */ |
| 2816 | out_buf[0] = (unsigned char)final; /* write first decompressed byte */ |
| 2817 | outcnt = 1; |
| 2818 | |
| 2819 | /* decode codes */ |
| 2820 | stack = 0; |
| 2821 | for (;;) { |
| 2822 | /* if the table will be full after this, increment the code size */ |
| 2823 | if (end >= mask && bits < max) { |
| 2824 | FLUSHCODE(); |
| 2825 | bits++; |
| 2826 | mask <<= 1; |
| 2827 | mask++; |
| 2828 | } |
| 2829 | |
| 2830 | /* get a code of length bits */ |
| 2831 | if (chunk == 0) /* decrement chunk modulo bits */ |
| 2832 | chunk = bits; |
| 2833 | code = rem; /* low bits of code */ |
| 2834 | got = GET(); |
| 2835 | if (in_eof) { /* EOF is end of compressed data */ |
| 2836 | /* write remaining buffered output */ |
| 2837 | out_tot += outcnt; |
| 2838 | if (outcnt && decode == 1) |
| 2839 | writen(outd, out_buf, outcnt); |
| 2840 | return; |
| 2841 | } |
| 2842 | code += (unsigned)got << left; /* middle (or high) bits of code */ |
| 2843 | left += 8; |
| 2844 | chunk--; |
| 2845 | if (bits > left) { /* need more bits */ |
| 2846 | got = GET(); |
| 2847 | if (in_eof) /* can't end in middle of code */ |
| 2848 | bail("invalid lzw code: ", in); |
| 2849 | code += (unsigned)got << left; /* high bits of code */ |
| 2850 | left += 8; |
| 2851 | chunk--; |
| 2852 | } |
| 2853 | code &= mask; /* mask to current code length */ |
| 2854 | left -= bits; /* number of unused bits */ |
| 2855 | rem = (unsigned)got >> (8 - left); /* unused bits from last byte */ |
| 2856 | |
| 2857 | /* process clear code (256) */ |
| 2858 | if (code == 256 && flags) { |
| 2859 | FLUSHCODE(); |
| 2860 | bits = 9; /* initialize bits and mask */ |
| 2861 | mask = 0x1ff; |
| 2862 | end = 255; /* empty table */ |
| 2863 | continue; /* get next code */ |
| 2864 | } |
| 2865 | |
| 2866 | /* special code to reuse last match */ |
| 2867 | temp = code; /* save the current code */ |
| 2868 | if (code > end) { |
| 2869 | /* Be picky on the allowed code here, and make sure that the code |
| 2870 | we drop through (prev) will be a valid index so that random |
| 2871 | input does not cause an exception. The code != end + 1 check is |
| 2872 | empirically derived, and not checked in the original uncompress |
| 2873 | code. If this ever causes a problem, that check could be safely |
| 2874 | removed. Leaving this check in greatly improves pigz's ability |
| 2875 | to detect random or corrupted input after a compress header. |
| 2876 | In any case, the prev > end check must be retained. */ |
| 2877 | if (code != end + 1 || prev > end) |
| 2878 | bail("invalid lzw code: ", in); |
| 2879 | match[stack++] = (unsigned char)final; |
| 2880 | code = prev; |
| 2881 | } |
| 2882 | |
| 2883 | /* walk through linked list to generate output in reverse order */ |
| 2884 | p = match + stack; |
| 2885 | while (code >= 256) { |
| 2886 | *p++ = suffix[code]; |
| 2887 | code = prefix[code]; |
| 2888 | } |
| 2889 | stack = p - match; |
| 2890 | match[stack++] = (unsigned char)code; |
| 2891 | final = code; |
| 2892 | |
| 2893 | /* link new table entry */ |
| 2894 | if (end < mask) { |
| 2895 | end++; |
| 2896 | prefix[end] = (unsigned short)prev; |
| 2897 | suffix[end] = (unsigned char)final; |
| 2898 | } |
| 2899 | |
| 2900 | /* set previous code for next iteration */ |
| 2901 | prev = temp; |
| 2902 | |
| 2903 | /* write output in forward order */ |
| 2904 | while (stack > OUTSIZE - outcnt) { |
| 2905 | while (outcnt < OUTSIZE) |
| 2906 | out_buf[outcnt++] = match[--stack]; |
| 2907 | out_tot += outcnt; |
| 2908 | if (decode == 1) |
| 2909 | writen(outd, out_buf, outcnt); |
| 2910 | outcnt = 0; |
| 2911 | } |
| 2912 | p = match + stack; |
| 2913 | do { |
| 2914 | out_buf[outcnt++] = *--p; |
| 2915 | } while (p > match); |
| 2916 | stack = 0; |
| 2917 | |
| 2918 | /* loop for next code with final and prev as the last match, rem and |
| 2919 | left provide the first 0..7 bits of the next code, end is the last |
| 2920 | valid table entry */ |
| 2921 | } |
| 2922 | } |
| 2923 | |
| 2924 | /* --- file processing --- */ |
| 2925 | |
| 2926 | /* extract file name from path */ |
| 2927 | local char *justname(char *path) |
| 2928 | { |
| 2929 | char *p; |
| 2930 | |
| 2931 | p = path + strlen(path); |
| 2932 | while (--p >= path) |
| 2933 | if (*p == '/') |
| 2934 | break; |
| 2935 | return p + 1; |
| 2936 | } |
| 2937 | |
| 2938 | /* Copy file attributes, from -> to, as best we can. This is best effort, so |
| 2939 | no errors are reported. The mode bits, including suid, sgid, and the sticky |
| 2940 | bit are copied (if allowed), the owner's user id and group id are copied |
| 2941 | (again if allowed), and the access and modify times are copied. */ |
| 2942 | local void copymeta(char *from, char *to) |
| 2943 | { |
| 2944 | struct stat st; |
| 2945 | struct timeval times[2]; |
| 2946 | |
| 2947 | /* get all of from's Unix meta data, return if not a regular file */ |
| 2948 | if (stat(from, &st) != 0 || (st.st_mode & S_IFMT) != S_IFREG) |
| 2949 | return; |
| 2950 | |
| 2951 | /* set to's mode bits, ignore errors */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 2952 | (void)chmod(to, st.st_mode & 07777); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 2953 | |
| 2954 | /* copy owner's user and group, ignore errors */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 2955 | (void)chown(to, st.st_uid, st.st_gid); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 2956 | |
| 2957 | /* copy access and modify times, ignore errors */ |
| 2958 | times[0].tv_sec = st.st_atime; |
| 2959 | times[0].tv_usec = 0; |
| 2960 | times[1].tv_sec = st.st_mtime; |
| 2961 | times[1].tv_usec = 0; |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 2962 | (void)utimes(to, times); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 2963 | } |
| 2964 | |
| 2965 | /* set the access and modify times of fd to t */ |
| 2966 | local void touch(char *path, time_t t) |
| 2967 | { |
| 2968 | struct timeval times[2]; |
| 2969 | |
| 2970 | times[0].tv_sec = t; |
| 2971 | times[0].tv_usec = 0; |
| 2972 | times[1].tv_sec = t; |
| 2973 | times[1].tv_usec = 0; |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 2974 | (void)utimes(path, times); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 2975 | } |
| 2976 | |
| 2977 | /* process provided input file, or stdin if path is NULL -- process() can |
| 2978 | call itself for recursive directory processing */ |
| 2979 | local void process(char *path) |
| 2980 | { |
| 2981 | int method = -1; /* get_header() return value */ |
| 2982 | size_t len; /* length of base name (minus suffix) */ |
| 2983 | struct stat st; /* to get file type and mod time */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 2984 | /* all compressed suffixes for decoding search, in length order */ |
| 2985 | static char *sufs[] = {".z", "-z", "_z", ".Z", ".gz", "-gz", ".zz", "-zz", |
| 2986 | ".zip", ".ZIP", ".tgz", NULL}; |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 2987 | |
| 2988 | /* open input file with name in, descriptor ind -- set name and mtime */ |
| 2989 | if (path == NULL) { |
| 2990 | strcpy(in, "<stdin>"); |
| 2991 | ind = 0; |
| 2992 | name = NULL; |
| 2993 | mtime = headis & 2 ? |
| 2994 | (fstat(ind, &st) ? time(NULL) : st.st_mtime) : 0; |
| 2995 | len = 0; |
| 2996 | } |
| 2997 | else { |
| 2998 | /* set input file name (already set if recursed here) */ |
| 2999 | if (path != in) { |
| 3000 | strncpy(in, path, sizeof(in)); |
| 3001 | if (in[sizeof(in) - 1]) |
| 3002 | bail("name too long: ", path); |
| 3003 | } |
| 3004 | len = strlen(in); |
| 3005 | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3006 | /* try to stat input file -- if not there and decoding, look for that |
| 3007 | name with compressed suffixes */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3008 | if (lstat(in, &st)) { |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3009 | if (errno == ENOENT && (list || decode)) { |
| 3010 | char **try = sufs; |
| 3011 | do { |
| 3012 | if (*try == NULL || len + strlen(*try) >= sizeof(in)) |
| 3013 | break; |
| 3014 | strcpy(in + len, *try++); |
| 3015 | errno = 0; |
| 3016 | } while (lstat(in, &st) && errno == ENOENT); |
| 3017 | } |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3018 | #ifdef EOVERFLOW |
| 3019 | if (errno == EOVERFLOW || errno == EFBIG) |
| 3020 | bail(in, |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3021 | " too large -- not compiled with large file support"); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3022 | #endif |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3023 | if (errno) { |
| 3024 | in[len] = 0; |
| 3025 | complain("%s does not exist -- skipping", in); |
| 3026 | return; |
| 3027 | } |
| 3028 | len = strlen(in); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3029 | } |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3030 | |
| 3031 | /* only process regular files, but allow symbolic links if -f, |
| 3032 | recurse into directory if -r */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3033 | if ((st.st_mode & S_IFMT) != S_IFREG && |
| 3034 | (st.st_mode & S_IFMT) != S_IFLNK && |
| 3035 | (st.st_mode & S_IFMT) != S_IFDIR) { |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3036 | complain("%s is a special file or device -- skipping", in); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3037 | return; |
| 3038 | } |
| 3039 | if ((st.st_mode & S_IFMT) == S_IFLNK && !force && !pipeout) { |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3040 | complain("%s is a symbolic link -- skipping", in); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3041 | return; |
| 3042 | } |
| 3043 | if ((st.st_mode & S_IFMT) == S_IFDIR && !recurse) { |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3044 | complain("%s is a directory -- skipping", in); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3045 | return; |
| 3046 | } |
| 3047 | |
| 3048 | /* recurse into directory (assumes Unix) */ |
| 3049 | if ((st.st_mode & S_IFMT) == S_IFDIR) { |
| 3050 | char *roll, *item, *cut, *base, *bigger; |
| 3051 | size_t len, hold; |
| 3052 | DIR *here; |
| 3053 | struct dirent *next; |
| 3054 | |
| 3055 | /* accumulate list of entries (need to do this, since readdir() |
| 3056 | behavior not defined if directory modified between calls) */ |
| 3057 | here = opendir(in); |
| 3058 | if (here == NULL) |
| 3059 | return; |
| 3060 | hold = 512; |
| 3061 | roll = malloc(hold); |
| 3062 | if (roll == NULL) |
| 3063 | bail("not enough memory", ""); |
| 3064 | *roll = 0; |
| 3065 | item = roll; |
| 3066 | while ((next = readdir(here)) != NULL) { |
| 3067 | if (next->d_name[0] == 0 || |
| 3068 | (next->d_name[0] == '.' && (next->d_name[1] == 0 || |
| 3069 | (next->d_name[1] == '.' && next->d_name[2] == 0)))) |
| 3070 | continue; |
| 3071 | len = strlen(next->d_name) + 1; |
| 3072 | if (item + len + 1 > roll + hold) { |
| 3073 | do { /* make roll bigger */ |
| 3074 | hold <<= 1; |
| 3075 | } while (item + len + 1 > roll + hold); |
| 3076 | bigger = realloc(roll, hold); |
| 3077 | if (bigger == NULL) { |
| 3078 | free(roll); |
| 3079 | bail("not enough memory", ""); |
| 3080 | } |
| 3081 | item = bigger + (item - roll); |
| 3082 | roll = bigger; |
| 3083 | } |
| 3084 | strcpy(item, next->d_name); |
| 3085 | item += len; |
| 3086 | *item = 0; |
| 3087 | } |
| 3088 | closedir(here); |
| 3089 | |
| 3090 | /* run process() for each entry in the directory */ |
| 3091 | cut = base = in + strlen(in); |
| 3092 | if (base > in && base[-1] != (unsigned char)'/') { |
| 3093 | if ((size_t)(base - in) >= sizeof(in)) |
| 3094 | bail("path too long", in); |
| 3095 | *base++ = '/'; |
| 3096 | } |
| 3097 | item = roll; |
| 3098 | while (*item) { |
| 3099 | strncpy(base, item, sizeof(in) - (base - in)); |
| 3100 | if (in[sizeof(in) - 1]) { |
| 3101 | strcpy(in + (sizeof(in) - 4), "..."); |
| 3102 | bail("path too long: ", in); |
| 3103 | } |
| 3104 | process(in); |
| 3105 | item += strlen(item) + 1; |
| 3106 | } |
| 3107 | *cut = 0; |
| 3108 | |
| 3109 | /* release list of entries */ |
| 3110 | free(roll); |
| 3111 | return; |
| 3112 | } |
| 3113 | |
| 3114 | /* don't compress .gz (or provided suffix) files, unless -f */ |
| 3115 | if (!(force || list || decode) && len >= strlen(sufx) && |
| 3116 | strcmp(in + len - strlen(sufx), sufx) == 0) { |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3117 | complain("%s ends with %s -- skipping", in, sufx); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3118 | return; |
| 3119 | } |
| 3120 | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3121 | /* only decompress over input file with compressed suffix */ |
| 3122 | if (decode && !pipeout) { |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3123 | int suf = compressed_suffix(in); |
| 3124 | if (suf == 0) { |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3125 | complain("%s does not have compressed suffix -- skipping", in); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3126 | return; |
| 3127 | } |
| 3128 | len -= suf; |
| 3129 | } |
| 3130 | |
| 3131 | /* open input file */ |
| 3132 | ind = open(in, O_RDONLY, 0); |
| 3133 | if (ind < 0) |
| 3134 | bail("read error on ", in); |
| 3135 | |
| 3136 | /* prepare gzip header information for compression */ |
| 3137 | name = headis & 1 ? justname(in) : NULL; |
| 3138 | mtime = headis & 2 ? st.st_mtime : 0; |
| 3139 | } |
| 3140 | SET_BINARY_MODE(ind); |
| 3141 | |
| 3142 | /* if decoding or testing, try to read gzip header */ |
| 3143 | hname = NULL; |
| 3144 | if (decode) { |
| 3145 | in_init(); |
| 3146 | method = get_header(1); |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3147 | if (method != 8 && method != 256 && |
| 3148 | /* gzip -cdf acts like cat on uncompressed input */ |
| 3149 | !(method == -2 && force && pipeout && decode != 2 && !list)) { |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3150 | RELEASE(hname); |
| 3151 | if (ind != 0) |
| 3152 | close(ind); |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3153 | if (method != -1) |
| 3154 | complain(method < 0 ? "%s is not compressed -- skipping" : |
| 3155 | "%s has unknown compression method -- skipping", in); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3156 | return; |
| 3157 | } |
| 3158 | |
| 3159 | /* if requested, test input file (possibly a special list) */ |
| 3160 | if (decode == 2) { |
| 3161 | if (method == 8) |
| 3162 | infchk(); |
| 3163 | else { |
| 3164 | unlzw(); |
| 3165 | if (list) { |
| 3166 | in_tot -= 3; |
| 3167 | show_info(method, 0, out_tot, 0); |
| 3168 | } |
| 3169 | } |
| 3170 | RELEASE(hname); |
| 3171 | if (ind != 0) |
| 3172 | close(ind); |
| 3173 | return; |
| 3174 | } |
| 3175 | } |
| 3176 | |
| 3177 | /* if requested, just list information about input file */ |
| 3178 | if (list) { |
| 3179 | list_info(); |
| 3180 | RELEASE(hname); |
| 3181 | if (ind != 0) |
| 3182 | close(ind); |
| 3183 | return; |
| 3184 | } |
| 3185 | |
| 3186 | /* create output file out, descriptor outd */ |
| 3187 | if (path == NULL || pipeout) { |
| 3188 | /* write to stdout */ |
| 3189 | out = malloc(strlen("<stdout>") + 1); |
| 3190 | if (out == NULL) |
| 3191 | bail("not enough memory", ""); |
| 3192 | strcpy(out, "<stdout>"); |
| 3193 | outd = 1; |
| 3194 | if (!decode && !force && isatty(outd)) |
| 3195 | bail("trying to write compressed data to a terminal", |
| 3196 | " (use -f to force)"); |
| 3197 | } |
| 3198 | else { |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3199 | char *to, *repl; |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3200 | |
| 3201 | /* use header name for output when decompressing with -N */ |
| 3202 | to = in; |
| 3203 | if (decode && (headis & 1) != 0 && hname != NULL) { |
| 3204 | to = hname; |
| 3205 | len = strlen(hname); |
| 3206 | } |
| 3207 | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3208 | /* replace .tgx with .tar when decoding */ |
| 3209 | repl = decode && strcmp(to + len, ".tgz") ? "" : ".tar"; |
| 3210 | |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3211 | /* create output file and open to write */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3212 | out = malloc(len + (decode ? strlen(repl) : strlen(sufx)) + 1); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3213 | if (out == NULL) |
| 3214 | bail("not enough memory", ""); |
| 3215 | memcpy(out, to, len); |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3216 | strcpy(out + len, decode ? repl : sufx); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3217 | outd = open(out, O_CREAT | O_TRUNC | O_WRONLY | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3218 | (force ? 0 : O_EXCL), 0600); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3219 | |
| 3220 | /* if exists and not -f, give user a chance to overwrite */ |
| 3221 | if (outd < 0 && errno == EEXIST && isatty(0) && verbosity) { |
| 3222 | int ch, reply; |
| 3223 | |
| 3224 | fprintf(stderr, "%s exists -- overwrite (y/n)? ", out); |
| 3225 | fflush(stderr); |
| 3226 | reply = -1; |
| 3227 | do { |
| 3228 | ch = getchar(); |
| 3229 | if (reply < 0 && ch != ' ' && ch != '\t') |
| 3230 | reply = ch == 'y' || ch == 'Y' ? 1 : 0; |
| 3231 | } while (ch != EOF && ch != '\n' && ch != '\r'); |
| 3232 | if (reply == 1) |
| 3233 | outd = open(out, O_CREAT | O_TRUNC | O_WRONLY, |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3234 | 0600); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3235 | } |
| 3236 | |
| 3237 | /* if exists and no overwrite, report and go on to next */ |
| 3238 | if (outd < 0 && errno == EEXIST) { |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3239 | complain("%s exists -- skipping", out); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3240 | RELEASE(out); |
| 3241 | RELEASE(hname); |
| 3242 | if (ind != 0) |
| 3243 | close(ind); |
| 3244 | return; |
| 3245 | } |
| 3246 | |
| 3247 | /* if some other error, give up */ |
| 3248 | if (outd < 0) |
| 3249 | bail("write error on ", out); |
| 3250 | } |
| 3251 | SET_BINARY_MODE(outd); |
| 3252 | RELEASE(hname); |
| 3253 | |
| 3254 | /* process ind to outd */ |
| 3255 | if (verbosity > 1) |
| 3256 | fprintf(stderr, "%s to %s ", in, out); |
| 3257 | if (decode) { |
| 3258 | if (method == 8) |
| 3259 | infchk(); |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3260 | else if (method == 256) |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3261 | unlzw(); |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3262 | else |
| 3263 | cat(); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3264 | } |
| 3265 | #ifndef NOTHREAD |
| 3266 | else if (procs > 1) |
| 3267 | parallel_compress(); |
| 3268 | #endif |
| 3269 | else |
| 3270 | single_compress(0); |
| 3271 | if (verbosity > 1) { |
| 3272 | putc('\n', stderr); |
| 3273 | fflush(stderr); |
| 3274 | } |
| 3275 | |
| 3276 | /* finish up, copy attributes, set times, delete original */ |
| 3277 | if (ind != 0) |
| 3278 | close(ind); |
| 3279 | if (outd != 1) { |
| 3280 | if (close(outd)) |
| 3281 | bail("write error on ", out); |
| 3282 | outd = -1; /* now prevent deletion on interrupt */ |
| 3283 | if (ind != 0) { |
| 3284 | copymeta(in, out); |
| 3285 | if (!keep) |
| 3286 | unlink(in); |
| 3287 | } |
| 3288 | if (decode && (headis & 2) != 0 && stamp) |
| 3289 | touch(out, stamp); |
| 3290 | } |
| 3291 | RELEASE(out); |
| 3292 | } |
| 3293 | |
| 3294 | local char *helptext[] = { |
| 3295 | "Usage: pigz [options] [files ...]", |
| 3296 | " will compress files in place, adding the suffix '.gz'. If no files are", |
| 3297 | #ifdef NOTHREAD |
| 3298 | " specified, stdin will be compressed to stdout. pigz does what gzip does.", |
| 3299 | #else |
| 3300 | " specified, stdin will be compressed to stdout. pigz does what gzip does,", |
| 3301 | " but spreads the work over multiple processors and cores when compressing.", |
| 3302 | #endif |
| 3303 | "", |
| 3304 | "Options:", |
| 3305 | " -0 to -9, --fast, --best Compression levels, --fast is -1, --best is -9", |
| 3306 | " -b, --blocksize mmm Set compression block size to mmmK (default 128K)", |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3307 | " -c, --stdout Write all processed output to stdout (won't delete)", |
| 3308 | " -d, --decompress Decompress the compressed input", |
| 3309 | " -f, --force Force overwrite, compress .gz, links, and to terminal", |
| 3310 | " -h, --help Display a help screen and quit", |
| 3311 | " -i, --independent Compress blocks independently for damage recovery", |
| 3312 | " -k, --keep Do not delete original file after processing", |
| 3313 | " -K, --zip Compress to PKWare zip (.zip) single entry format", |
| 3314 | " -l, --list List the contents of the compressed input", |
| 3315 | " -L, --license Display the pigz license and quit", |
| 3316 | " -n, --no-name Do not store or restore file name in/from header", |
| 3317 | " -N, --name Store/restore file name and mod time in/from header", |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3318 | #ifndef NOTHREAD |
| 3319 | " -p, --processes n Allow up to n compression threads (default is the", |
| 3320 | " number of online processors, or 8 if unknown)", |
| 3321 | #endif |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3322 | " -q, --quiet Print no messages, even on error", |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3323 | " -r, --recursive Process the contents of all subdirectories", |
| 3324 | " -R, --rsyncable Input-determined block locations for rsync", |
| 3325 | " -S, --suffix .sss Use suffix .sss instead of .gz (for compression)", |
| 3326 | " -t, --test Test the integrity of the compressed input", |
| 3327 | " -T, --no-time Do not store or restore mod time in/from header", |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3328 | #ifdef DEBUG |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3329 | " -v, --verbose Provide more verbose output (-vv to debug)", |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3330 | #else |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3331 | " -v, --verbose Provide more verbose output", |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3332 | #endif |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3333 | " -V --version Show the version of pigz", |
| 3334 | " -z, --zlib Compress to zlib (.zz) instead of gzip format", |
| 3335 | " -- All arguments after \"--\" are treated as files" |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3336 | }; |
| 3337 | |
| 3338 | /* display the help text above */ |
| 3339 | local void help(void) |
| 3340 | { |
| 3341 | int n; |
| 3342 | |
| 3343 | if (verbosity == 0) |
| 3344 | return; |
| 3345 | for (n = 0; n < (int)(sizeof(helptext) / sizeof(char *)); n++) |
| 3346 | fprintf(stderr, "%s\n", helptext[n]); |
| 3347 | fflush(stderr); |
| 3348 | exit(0); |
| 3349 | } |
| 3350 | |
| 3351 | #ifndef NOTHREAD |
| 3352 | |
| 3353 | /* try to determine the number of processors */ |
| 3354 | local int nprocs(int n) |
| 3355 | { |
| 3356 | # ifdef _SC_NPROCESSORS_ONLN |
| 3357 | n = (int)sysconf(_SC_NPROCESSORS_ONLN); |
| 3358 | # else |
| 3359 | # ifdef _SC_NPROC_ONLN |
| 3360 | n = (int)sysconf(_SC_NPROC_ONLN); |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3361 | # else |
| 3362 | # ifdef __hpux |
| 3363 | struct pst_dynamic psd; |
| 3364 | |
| 3365 | if (pstat_getdynamic(&psd, sizeof(psd), (size_t)1, 0) != -1) |
| 3366 | n = psd.psd_proc_cnt; |
| 3367 | # endif |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3368 | # endif |
| 3369 | # endif |
| 3370 | return n; |
| 3371 | } |
| 3372 | |
| 3373 | #endif |
| 3374 | |
| 3375 | /* set option defaults */ |
| 3376 | local void defaults(void) |
| 3377 | { |
| 3378 | level = Z_DEFAULT_COMPRESSION; |
| 3379 | #ifdef NOTHREAD |
| 3380 | procs = 1; |
| 3381 | #else |
| 3382 | procs = nprocs(8); |
| 3383 | #endif |
| 3384 | size = 131072UL; |
| 3385 | rsync = 0; /* don't do rsync blocking */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3386 | setdict = 1; /* initialize dictionary each thread */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3387 | verbosity = 1; /* normal message level */ |
| 3388 | headis = 3; /* store/restore name and timestamp */ |
| 3389 | pipeout = 0; /* don't force output to stdout */ |
| 3390 | sufx = ".gz"; /* compressed file suffix */ |
| 3391 | decode = 0; /* compress */ |
| 3392 | list = 0; /* compress */ |
| 3393 | keep = 0; /* delete input file once compressed */ |
| 3394 | force = 0; /* don't overwrite, don't compress links */ |
| 3395 | recurse = 0; /* don't go into directories */ |
| 3396 | form = 0; /* use gzip format */ |
| 3397 | } |
| 3398 | |
| 3399 | /* long options conversion to short options */ |
| 3400 | local char *longopts[][2] = { |
| 3401 | {"LZW", "Z"}, {"ascii", "a"}, {"best", "9"}, {"bits", "Z"}, |
| 3402 | {"blocksize", "b"}, {"decompress", "d"}, {"fast", "1"}, {"force", "f"}, |
| 3403 | {"help", "h"}, {"independent", "i"}, {"keep", "k"}, {"license", "L"}, |
| 3404 | {"list", "l"}, {"name", "N"}, {"no-name", "n"}, {"no-time", "T"}, |
| 3405 | {"processes", "p"}, {"quiet", "q"}, {"recursive", "r"}, {"rsyncable", "R"}, |
| 3406 | {"silent", "q"}, {"stdout", "c"}, {"suffix", "S"}, {"test", "t"}, |
| 3407 | {"to-stdout", "c"}, {"uncompress", "d"}, {"verbose", "v"}, |
| 3408 | {"version", "V"}, {"zip", "K"}, {"zlib", "z"}}; |
| 3409 | #define NLOPTS (sizeof(longopts) / (sizeof(char *) << 1)) |
| 3410 | |
| 3411 | /* either new buffer size, new compression level, or new number of processes -- |
| 3412 | get rid of old buffers and threads to force the creation of new ones with |
| 3413 | the new settings */ |
| 3414 | local void new_opts(void) |
| 3415 | { |
| 3416 | single_compress(1); |
| 3417 | #ifndef NOTHREAD |
| 3418 | finish_jobs(); |
| 3419 | #endif |
| 3420 | } |
| 3421 | |
| 3422 | /* verify that arg is only digits, and if so, return the decimal value */ |
| 3423 | local size_t num(char *arg) |
| 3424 | { |
| 3425 | char *str = arg; |
| 3426 | size_t val = 0; |
| 3427 | |
| 3428 | if (*str == 0) |
| 3429 | bail("internal error: empty parameter", ""); |
| 3430 | do { |
| 3431 | if (*str < '0' || *str > '9') |
| 3432 | bail("invalid numeric parameter: ", arg); |
| 3433 | val = val * 10 + (*str - '0'); |
| 3434 | /* %% need to detect overflow here */ |
| 3435 | } while (*++str); |
| 3436 | return val; |
| 3437 | } |
| 3438 | |
| 3439 | /* process an option, return true if a file name and not an option */ |
| 3440 | local int option(char *arg) |
| 3441 | { |
| 3442 | static int get = 0; /* if not zero, look for option parameter */ |
| 3443 | char bad[3] = "-X"; /* for error messages (X is replaced) */ |
| 3444 | |
| 3445 | /* if no argument or dash option, check status of get */ |
| 3446 | if (get && (arg == NULL || *arg == '-')) { |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3447 | bad[1] = "bpS"[get - 1]; |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3448 | bail("missing parameter after ", bad); |
| 3449 | } |
| 3450 | if (arg == NULL) |
| 3451 | return 0; |
| 3452 | |
| 3453 | /* process long option or short options */ |
| 3454 | if (*arg == '-') { |
| 3455 | /* a single dash will be interpreted as stdin */ |
| 3456 | if (*++arg == 0) |
| 3457 | return 1; |
| 3458 | |
| 3459 | /* process long option (fall through with equivalent short option) */ |
| 3460 | if (*arg == '-') { |
| 3461 | int j; |
| 3462 | |
| 3463 | arg++; |
| 3464 | for (j = NLOPTS - 1; j >= 0; j--) |
| 3465 | if (strcmp(arg, longopts[j][0]) == 0) { |
| 3466 | arg = longopts[j][1]; |
| 3467 | break; |
| 3468 | } |
| 3469 | if (j < 0) |
| 3470 | bail("invalid option: ", arg - 2); |
| 3471 | } |
| 3472 | |
| 3473 | /* process short options (more than one allowed after dash) */ |
| 3474 | do { |
| 3475 | /* if looking for a parameter, don't process more single character |
| 3476 | options until we have the parameter */ |
| 3477 | if (get) { |
| 3478 | if (get == 3) |
| 3479 | bail("invalid usage: -s must be followed by space", ""); |
| 3480 | break; /* allow -pnnn and -bnnn, fall to parameter code */ |
| 3481 | } |
| 3482 | |
| 3483 | /* process next single character option */ |
| 3484 | bad[1] = *arg; |
| 3485 | switch (*arg) { |
| 3486 | case '0': case '1': case '2': case '3': case '4': |
| 3487 | case '5': case '6': case '7': case '8': case '9': |
| 3488 | level = *arg - '0'; |
| 3489 | new_opts(); |
| 3490 | break; |
| 3491 | case 'K': form = 2; sufx = ".zip"; break; |
| 3492 | case 'L': |
| 3493 | fputs(VERSION, stderr); |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3494 | fputs("Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012" |
| 3495 | " Mark Adler\n", |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3496 | stderr); |
| 3497 | fputs("Subject to the terms of the zlib license.\n", |
| 3498 | stderr); |
| 3499 | fputs("No warranty is provided or implied.\n", stderr); |
| 3500 | exit(0); |
| 3501 | case 'N': headis = 3; break; |
| 3502 | case 'T': headis &= ~2; break; |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3503 | case 'R': rsync = 1; break; |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3504 | case 'S': get = 3; break; |
| 3505 | case 'V': fputs(VERSION, stderr); exit(0); |
| 3506 | case 'Z': |
| 3507 | bail("invalid option: LZW output not supported: ", bad); |
| 3508 | case 'a': |
| 3509 | bail("invalid option: ascii conversion not supported: ", bad); |
| 3510 | case 'b': get = 1; break; |
| 3511 | case 'c': pipeout = 1; break; |
| 3512 | case 'd': decode = 1; headis = 0; break; |
| 3513 | case 'f': force = 1; break; |
| 3514 | case 'h': help(); break; |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3515 | case 'i': setdict = 0; break; |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3516 | case 'k': keep = 1; break; |
| 3517 | case 'l': list = 1; break; |
| 3518 | case 'n': headis &= ~1; break; |
| 3519 | case 'p': get = 2; break; |
| 3520 | case 'q': verbosity = 0; break; |
| 3521 | case 'r': recurse = 1; break; |
| 3522 | case 't': decode = 2; break; |
| 3523 | case 'v': verbosity++; break; |
| 3524 | case 'z': form = 1; sufx = ".zz"; break; |
| 3525 | default: |
| 3526 | bail("invalid option: ", bad); |
| 3527 | } |
| 3528 | } while (*++arg); |
| 3529 | if (*arg == 0) |
| 3530 | return 0; |
| 3531 | } |
| 3532 | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3533 | /* process option parameter for -b, -p, or -S */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3534 | if (get) { |
| 3535 | size_t n; |
| 3536 | |
| 3537 | if (get == 1) { |
| 3538 | n = num(arg); |
| 3539 | size = n << 10; /* chunk size */ |
| 3540 | if (size < DICT) |
| 3541 | bail("block size too small (must be >= 32K)", ""); |
| 3542 | if (n != size >> 10 || |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3543 | OUTPOOL(size) < size || |
| 3544 | (ssize_t)OUTPOOL(size) < 0 || |
| 3545 | size > (1UL << 22)) |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3546 | bail("block size too large: ", arg); |
| 3547 | new_opts(); |
| 3548 | } |
| 3549 | else if (get == 2) { |
| 3550 | n = num(arg); |
| 3551 | procs = (int)n; /* # processes */ |
| 3552 | if (procs < 1) |
| 3553 | bail("invalid number of processes: ", arg); |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3554 | if ((size_t)procs != n || INBUFS(procs) < 1) |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3555 | bail("too many processes: ", arg); |
| 3556 | #ifdef NOTHREAD |
| 3557 | if (procs > 1) |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3558 | bail("compiled without threads", ""); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3559 | #endif |
| 3560 | new_opts(); |
| 3561 | } |
| 3562 | else if (get == 3) |
| 3563 | sufx = arg; /* gz suffix */ |
| 3564 | get = 0; |
| 3565 | return 0; |
| 3566 | } |
| 3567 | |
| 3568 | /* neither an option nor parameter */ |
| 3569 | return 1; |
| 3570 | } |
| 3571 | |
| 3572 | /* catch termination signal */ |
| 3573 | local void cut_short(int sig) |
| 3574 | { |
| 3575 | (void)sig; |
| 3576 | Trace(("termination by user")); |
| 3577 | if (outd != -1 && out != NULL) |
| 3578 | unlink(out); |
| 3579 | log_dump(); |
| 3580 | _exit(1); |
| 3581 | } |
| 3582 | |
| 3583 | /* Process arguments, compress in the gzip format. Note that procs must be at |
| 3584 | least two in order to provide a dictionary in one work unit for the other |
| 3585 | work unit, and that size must be at least 32K to store a full dictionary. */ |
| 3586 | int main(int argc, char **argv) |
| 3587 | { |
| 3588 | int n; /* general index */ |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3589 | int noop; /* true to suppress option decoding */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3590 | unsigned long done; /* number of named files processed */ |
| 3591 | char *opts, *p; /* environment default options, marker */ |
| 3592 | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3593 | /* save pointer to program name for error messages */ |
| 3594 | p = strrchr(argv[0], '/'); |
| 3595 | p = p == NULL ? argv[0] : p + 1; |
| 3596 | prog = *p ? p : "pigz"; |
| 3597 | |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3598 | /* prepare for interrupts and logging */ |
| 3599 | signal(SIGINT, cut_short); |
| 3600 | #ifndef NOTHREAD |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3601 | yarn_prefix = prog; /* prefix for yarn error messages */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3602 | yarn_abort = cut_short; /* call on thread error */ |
| 3603 | #endif |
| 3604 | #ifdef DEBUG |
| 3605 | gettimeofday(&start, NULL); /* starting time for log entries */ |
| 3606 | log_init(); /* initialize logging */ |
| 3607 | #endif |
| 3608 | |
| 3609 | /* set all options to defaults */ |
| 3610 | defaults(); |
| 3611 | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3612 | /* process user environment variable defaults in GZIP */ |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3613 | opts = getenv("GZIP"); |
| 3614 | if (opts != NULL) { |
| 3615 | while (*opts) { |
| 3616 | while (*opts == ' ' || *opts == '\t') |
| 3617 | opts++; |
| 3618 | p = opts; |
| 3619 | while (*p && *p != ' ' && *p != '\t') |
| 3620 | p++; |
| 3621 | n = *p; |
| 3622 | *p = 0; |
| 3623 | if (option(opts)) |
| 3624 | bail("cannot provide files in GZIP environment variable", ""); |
| 3625 | opts = p + (n ? 1 : 0); |
| 3626 | } |
| 3627 | option(NULL); |
| 3628 | } |
| 3629 | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3630 | /* process user environment variable defaults in PIGZ as well */ |
| 3631 | opts = getenv("PIGZ"); |
| 3632 | if (opts != NULL) { |
| 3633 | while (*opts) { |
| 3634 | while (*opts == ' ' || *opts == '\t') |
| 3635 | opts++; |
| 3636 | p = opts; |
| 3637 | while (*p && *p != ' ' && *p != '\t') |
| 3638 | p++; |
| 3639 | n = *p; |
| 3640 | *p = 0; |
| 3641 | if (option(opts)) |
| 3642 | bail("cannot provide files in PIGZ environment variable", ""); |
| 3643 | opts = p + (n ? 1 : 0); |
| 3644 | } |
| 3645 | option(NULL); |
| 3646 | } |
| 3647 | |
| 3648 | /* decompress if named "unpigz" or "gunzip", to stdout if "*cat" */ |
| 3649 | if (strcmp(prog, "unpigz") == 0 || strcmp(prog, "gunzip") == 0) |
| 3650 | decode = 1, headis = 0; |
| 3651 | if ((n = strlen(prog)) > 2 && strcmp(prog + n - 3, "cat") == 0) |
| 3652 | decode = 1, headis = 0, pipeout = 1; |
| 3653 | |
| 3654 | /* if no arguments and compressed data to or from a terminal, show help */ |
| 3655 | if (argc < 2 && isatty(decode ? 0 : 1)) |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3656 | help(); |
| 3657 | |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3658 | /* process command-line arguments, no options after "--" */ |
| 3659 | done = noop = 0; |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3660 | for (n = 1; n < argc; n++) |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3661 | if (noop == 0 && strcmp(argv[n], "--") == 0) { |
| 3662 | noop = 1; |
| 3663 | option(NULL); |
| 3664 | } |
| 3665 | else if (noop || option(argv[n])) { /* true if file name, process it */ |
| 3666 | if (done == 1 && pipeout && !decode && !list && form > 1) |
| 3667 | complain("warning: output will be concatenated zip files -- " |
| 3668 | "will not be able to extract"); |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3669 | process(strcmp(argv[n], "-") ? argv[n] : NULL); |
| 3670 | done++; |
| 3671 | } |
| 3672 | option(NULL); |
| 3673 | |
| 3674 | /* list stdin or compress stdin to stdout if no file names provided */ |
| 3675 | if (done == 0) |
| 3676 | process(NULL); |
| 3677 | |
| 3678 | /* done -- release resources, show log */ |
| 3679 | new_opts(); |
| 3680 | log_dump(); |
Dees_Troy | 3bde123 | 2012-09-22 08:10:28 -0400 | [diff] [blame] | 3681 | return warned ? 2 : 0; |
Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 3682 | } |