Dees_Troy | 51a0e82 | 2012-09-05 15:24:24 -0400 | [diff] [blame] | 1 | /* pigz.c -- parallel implementation of gzip |
| 2 | * Copyright (C) 2007, 2008, 2009, 2010 Mark Adler |
| 3 | * Version 2.1.6 17 Jan 2010 Mark Adler |
| 4 | */ |
| 5 | |
| 6 | /* |
| 7 | This software is provided 'as-is', without any express or implied |
| 8 | warranty. In no event will the author be held liable for any damages |
| 9 | arising from the use of this software. |
| 10 | |
| 11 | Permission is granted to anyone to use this software for any purpose, |
| 12 | including commercial applications, and to alter it and redistribute it |
| 13 | freely, subject to the following restrictions: |
| 14 | |
| 15 | 1. The origin of this software must not be misrepresented; you must not |
| 16 | claim that you wrote the original software. If you use this software |
| 17 | in a product, an acknowledgment in the product documentation would be |
| 18 | appreciated but is not required. |
| 19 | 2. Altered source versions must be plainly marked as such, and must not be |
| 20 | misrepresented as being the original software. |
| 21 | 3. This notice may not be removed or altered from any source distribution. |
| 22 | |
| 23 | Mark Adler |
| 24 | madler@alumni.caltech.edu |
| 25 | |
| 26 | Mark accepts donations for providing this software. Donations are not |
| 27 | required or expected. Any amount that you feel is appropriate would be |
| 28 | appreciated. You can use this link: |
| 29 | |
| 30 | https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=536055 |
| 31 | |
| 32 | */ |
| 33 | |
| 34 | /* Version history: |
| 35 | 1.0 17 Jan 2007 First version, pipe only |
| 36 | 1.1 28 Jan 2007 Avoid void * arithmetic (some compilers don't get that) |
| 37 | Add note about requiring zlib 1.2.3 |
| 38 | Allow compression level 0 (no compression) |
| 39 | Completely rewrite parallelism -- add a write thread |
| 40 | Use deflateSetDictionary() to make use of history |
| 41 | Tune argument defaults to best performance on four cores |
| 42 | 1.2.1 1 Feb 2007 Add long command line options, add all gzip options |
| 43 | Add debugging options |
| 44 | 1.2.2 19 Feb 2007 Add list (--list) function |
| 45 | Process file names on command line, write .gz output |
| 46 | Write name and time in gzip header, set output file time |
| 47 | Implement all command line options except --recursive |
| 48 | Add --keep option to prevent deleting input files |
| 49 | Add thread tracing information with -vv used |
| 50 | Copy crc32_combine() from zlib (possible thread issue) |
| 51 | 1.3 25 Feb 2007 Implement --recursive |
| 52 | Expand help to show all options |
| 53 | Show help if no arguments or output piping are provided |
| 54 | Process options in GZIP environment variable |
| 55 | Add progress indicator to write thread if --verbose |
| 56 | 1.4 4 Mar 2007 Add --independent to facilitate damaged file recovery |
| 57 | Reallocate jobs for new --blocksize or --processes |
| 58 | Do not delete original if writing to stdout |
| 59 | Allow --processes 1, which does no threading |
| 60 | Add NOTHREAD define to compile without threads |
| 61 | Incorporate license text from zlib in source code |
| 62 | 1.5 25 Mar 2007 Reinitialize jobs for new compression level |
| 63 | Copy attributes and owner from input file to output file |
| 64 | Add decompression and testing |
| 65 | Add -lt (or -ltv) to show all entries and proper lengths |
| 66 | Add decompression, testing, listing of LZW (.Z) files |
| 67 | Only generate and show trace log if DEBUG defined |
| 68 | Take "-" argument to mean read file from stdin |
| 69 | 1.6 30 Mar 2007 Add zlib stream compression (--zlib), and decompression |
| 70 | 1.7 29 Apr 2007 Decompress first entry of a zip file (if deflated) |
| 71 | Avoid empty deflate blocks at end of deflate stream |
| 72 | Show zlib check value (Adler-32) when listing |
| 73 | Don't complain when decompressing empty file |
| 74 | Warn about trailing junk for gzip and zlib streams |
| 75 | Make listings consistent, ignore gzip extra flags |
| 76 | Add zip stream compression (--zip) |
| 77 | 1.8 13 May 2007 Document --zip option in help output |
| 78 | 2.0 19 Oct 2008 Complete rewrite of thread usage and synchronization |
| 79 | Use polling threads and a pool of memory buffers |
| 80 | Remove direct pthread library use, hide in yarn.c |
| 81 | 2.0.1 20 Oct 2008 Check version of zlib at compile time, need >= 1.2.3 |
| 82 | 2.1 24 Oct 2008 Decompress with read, write, inflate, and check threads |
| 83 | Remove spurious use of ctime_r(), ctime() more portable |
| 84 | Change application of job->calc lock to be a semaphore |
| 85 | Detect size of off_t at run time to select %lu vs. %llu |
| 86 | #define large file support macro even if not __linux__ |
| 87 | Remove _LARGEFILE64_SOURCE, _FILE_OFFSET_BITS is enough |
| 88 | Detect file-too-large error and report, blame build |
| 89 | Replace check combination routines with those from zlib |
| 90 | 2.1.1 28 Oct 2008 Fix a leak for files with an integer number of blocks |
| 91 | Update for yarn 1.1 (yarn_prefix and yarn_abort) |
| 92 | 2.1.2 30 Oct 2008 Work around use of beta zlib in production systems |
| 93 | 2.1.3 8 Nov 2008 Don't use zlib combination routines, put back in pigz |
| 94 | 2.1.4 9 Nov 2008 Fix bug when decompressing very short files |
| 95 | 2.1.5 20 Jul 2009 Added 2008, 2009 to --license statement |
| 96 | Allow numeric parameter immediately after -p or -b |
| 97 | Enforce parameter after -p, -b, -s, before other options |
| 98 | Enforce numeric parameters to have only numeric digits |
| 99 | Try to determine the number of processors for -p default |
| 100 | Fix --suffix short option to be -S to match gzip [Bloch] |
| 101 | Decompress if executable named "unpigz" [Amundsen] |
| 102 | Add a little bit of testing to Makefile |
| 103 | 2.1.6 17 Jan 2010 Added pigz.spec to distribution for RPM systems [Brown] |
| 104 | Avoid some compiler warnings |
| 105 | Process symbolic links if piping to stdout [Hoffstätte] |
| 106 | Decompress if executable named "gunzip" [Hoffstätte] |
| 107 | Allow ".tgz" suffix [Chernookiy] |
| 108 | Fix adler32 comparison on .zz files |
| 109 | */ |
| 110 | |
| 111 | #define VERSION "pigz 2.1.6\n" |
| 112 | |
| 113 | /* To-do: |
| 114 | - add --rsyncable (or -R) [use my own algorithm, set min/max block size] |
| 115 | - make source portable for Windows, VMS, etc. (see gzip source code) |
| 116 | - make build portable (currently good for Unixish) |
| 117 | - add bzip2 decompression |
| 118 | */ |
| 119 | |
| 120 | /* |
| 121 | pigz compresses using threads to make use of multiple processors and cores. |
| 122 | The input is broken up into 128 KB chunks with each compressed in parallel. |
| 123 | The individual check value for each chunk is also calculated in parallel. |
| 124 | The compressed data is written in order to the output, and a combined check |
| 125 | value is calculated from the individual check values. |
| 126 | |
| 127 | The compressed data format generated is in the gzip, zlib, or single-entry |
| 128 | zip format using the deflate compression method. The compression produces |
| 129 | partial raw deflate streams which are concatenated by a single write thread |
| 130 | and wrapped with the appropriate header and trailer, where the trailer |
| 131 | contains the combined check value. |
| 132 | |
| 133 | Each partial raw deflate stream is terminated by an empty stored block |
| 134 | (using the Z_SYNC_FLUSH option of zlib), in order to end that partial bit |
| 135 | stream at a byte boundary. That allows the partial streams to be |
| 136 | concatenated simply as sequences of bytes. This adds a very small four to |
| 137 | five byte overhead to the output for each input chunk. |
| 138 | |
| 139 | The default input block size is 128K, but can be changed with the -b option. |
| 140 | The number of compress threads is set by default to 8, which can be changed |
| 141 | using the -p option. Specifying -p 1 avoids the use of threads entirely. |
| 142 | pigz will try to determine the number of processors in the machine, in which |
| 143 | case if that number is two or greater, pigz will use that as the default for |
| 144 | -p instead of 8. |
| 145 | |
| 146 | The input blocks, while compressed independently, have the last 32K of the |
| 147 | previous block loaded as a preset dictionary to preserve the compression |
| 148 | effectiveness of deflating in a single thread. This can be turned off using |
| 149 | the --independent or -i option, so that the blocks can be decompressed |
| 150 | independently for partial error recovery or for random access. |
| 151 | |
| 152 | Decompression can't be parallelized, at least not without specially prepared |
| 153 | deflate streams for that purpose. As a result, pigz uses a single thread |
| 154 | (the main thread) for decompression, but will create three other threads for |
| 155 | reading, writing, and check calculation, which can speed up decompression |
| 156 | under some circumstances. Parallel decompression can be turned off by |
| 157 | specifying one process (-dp 1 or -tp 1). |
| 158 | |
| 159 | pigz requires zlib 1.2.1 or later to allow setting the dictionary when doing |
| 160 | raw deflate. Since zlib 1.2.3 corrects security vulnerabilities in zlib |
| 161 | version 1.2.1 and 1.2.2, conditionals check for zlib 1.2.3 or later during |
| 162 | the compilation of pigz.c. zlib 1.2.4 includes some improvements to |
| 163 | Z_FULL_FLUSH and deflateSetDictionary() that permit identical output for |
| 164 | pigz with and without threads, which is not possible with zlib 1.2.3. This |
| 165 | may be important for uses of pigz -R where small changes in the contents |
| 166 | should result in small changes in the archive for rsync. Note that due to |
| 167 | the details of how the lower levels of compression result in greater speed, |
| 168 | compression level 3 and below does not permit identical pigz output with |
| 169 | and without threads. |
| 170 | |
| 171 | pigz uses the POSIX pthread library for thread control and communication, |
| 172 | through the yarn.h interface to yarn.c. yarn.c can be replaced with |
| 173 | equivalent implementations using other thread libraries. pigz can be |
| 174 | compiled with NOTHREAD #defined to not use threads at all (in which case |
| 175 | pigz will not be able to live up to the "parallel" in its name). |
| 176 | */ |
| 177 | |
| 178 | /* |
| 179 | Details of parallel compression implementation: |
| 180 | |
| 181 | When doing parallel compression, pigz uses the main thread to read the input |
| 182 | in 'size' sized chunks (see -b), and puts those in a compression job list, |
| 183 | each with a sequence number to keep track of the ordering. If it is not the |
| 184 | first chunk, then that job also points to the previous input buffer, from |
| 185 | which the last 32K will be used as a dictionary (unless -i is specified). |
| 186 | This sets a lower limit of 32K on 'size'. |
| 187 | |
| 188 | pigz launches up to 'procs' compression threads (see -p). Each compression |
| 189 | thread continues to look for jobs in the compression list and perform those |
| 190 | jobs until instructed to return. When a job is pulled, the dictionary, if |
| 191 | provided, will be loaded into the deflate engine and then that input buffer |
| 192 | is dropped for reuse. Then the input data is compressed into an output |
| 193 | buffer sized to assure that it can contain maximally expanded deflate data. |
| 194 | The job is then put into the write job list, sorted by the sequence number. |
| 195 | The compress thread however continues to calculate the check value on the |
| 196 | input data, either a CRC-32 or Adler-32, possibly in parallel with the write |
| 197 | thread writing the output data. Once that's done, the compress thread drops |
| 198 | the input buffer and also releases the lock on the check value so that the |
| 199 | write thread can combine it with the previous check values. The compress |
| 200 | thread has then completed that job, and goes to look for another. |
| 201 | |
| 202 | All of the compress threads are left running and waiting even after the last |
| 203 | chunk is processed, so that they can support the next input to be compressed |
| 204 | (more than one input file on the command line). Once pigz is done, it will |
| 205 | call all the compress threads home (that'll do pig, that'll do). |
| 206 | |
| 207 | Before starting to read the input, the main thread launches the write thread |
| 208 | so that it is ready pick up jobs immediately. The compress thread puts the |
| 209 | write jobs in the list in sequence sorted order, so that the first job in |
| 210 | the list is always has the lowest sequence number. The write thread waits |
| 211 | for the next write job in sequence, and then gets that job. The job still |
| 212 | holds its input buffer, from which the write thread gets the input buffer |
| 213 | length for use in check value combination. Then the write thread drops that |
| 214 | input buffer to allow its reuse. Holding on to the input buffer until the |
| 215 | write thread starts also has the benefit that the read and compress threads |
| 216 | can't get way ahead of the write thread and build up a large backlog of |
| 217 | unwritten compressed data. The write thread will write the compressed data, |
| 218 | drop the output buffer, and then wait for the check value to be unlocked |
| 219 | by the compress thread. Then the write thread combines the check value for |
| 220 | this chunk with the total check value for eventual use in the trailer. If |
| 221 | this is not the last chunk, the write thread then goes back to look for the |
| 222 | next output chunk in sequence. After the last chunk, the write thread |
| 223 | returns and joins the main thread. Unlike the compress threads, a new write |
| 224 | thread is launched for each input stream. The write thread writes the |
| 225 | appropriate header and trailer around the compressed data. |
| 226 | |
| 227 | The input and output buffers are reused through their collection in pools. |
| 228 | Each buffer has a use count, which when decremented to zero returns the |
| 229 | buffer to the respective pool. Each input buffer has up to three parallel |
| 230 | uses: as the input for compression, as the data for the check value |
| 231 | calculation, and as a dictionary for compression. Each output buffer has |
| 232 | only one use, which is as the output of compression followed serially as |
| 233 | data to be written. The input pool is limited in the number of buffers, so |
| 234 | that reading does not get way ahead of compression and eat up memory with |
| 235 | more input than can be used. The limit is approximately two times the |
| 236 | number of compression threads. In the case that reading is fast as compared |
| 237 | to compression, that number allows a second set of buffers to be read while |
| 238 | the first set of compressions are being performed. The number of output |
| 239 | buffers is not directly limited, but is indirectly limited by the release of |
| 240 | input buffers to the same number. |
| 241 | */ |
| 242 | |
| 243 | /* use large file functions if available */ |
| 244 | #define _FILE_OFFSET_BITS 64 |
| 245 | |
| 246 | /* included headers and what is expected from each */ |
| 247 | #include <stdio.h> /* fflush(), fprintf(), fputs(), getchar(), putc(), */ |
| 248 | /* puts(), printf(), vasprintf(), stderr, EOF, NULL, |
| 249 | SEEK_END, size_t, off_t */ |
| 250 | #include <stdlib.h> /* exit(), malloc(), free(), realloc(), atol(), */ |
| 251 | /* atoi(), getenv() */ |
| 252 | #include <stdarg.h> /* va_start(), va_end(), va_list */ |
| 253 | #include <string.h> /* memset(), memchr(), memcpy(), strcmp(), strcpy() */ |
| 254 | /* strncpy(), strlen(), strcat(), strrchr() */ |
| 255 | #include <errno.h> /* errno, EEXIST */ |
| 256 | #include <assert.h> /* assert() */ |
| 257 | #include <time.h> /* ctime(), time(), time_t, mktime() */ |
| 258 | #include <signal.h> /* signal(), SIGINT */ |
| 259 | #include <sys/types.h> /* ssize_t */ |
| 260 | #include <sys/stat.h> /* chmod(), stat(), fstat(), lstat(), struct stat, */ |
| 261 | /* S_IFDIR, S_IFLNK, S_IFMT, S_IFREG */ |
| 262 | #include <sys/time.h> /* utimes(), gettimeofday(), struct timeval */ |
| 263 | #include <unistd.h> /* unlink(), _exit(), read(), write(), close(), */ |
| 264 | /* lseek(), isatty(), chown() */ |
| 265 | #include <fcntl.h> /* open(), O_CREAT, O_EXCL, O_RDONLY, O_TRUNC, */ |
| 266 | /* O_WRONLY */ |
| 267 | #include <dirent.h> /* opendir(), readdir(), closedir(), DIR, */ |
| 268 | /* struct dirent */ |
| 269 | #include <limits.h> /* PATH_MAX */ |
| 270 | |
| 271 | #include "zlib.h" /* deflateInit2(), deflateReset(), deflate(), */ |
| 272 | /* deflateEnd(), deflateSetDictionary(), crc32(), |
| 273 | inflateBackInit(), inflateBack(), inflateBackEnd(), |
| 274 | Z_DEFAULT_COMPRESSION, Z_DEFAULT_STRATEGY, |
| 275 | Z_DEFLATED, Z_NO_FLUSH, Z_NULL, Z_OK, |
| 276 | Z_SYNC_FLUSH, z_stream */ |
| 277 | #if !defined(ZLIB_VERNUM) || ZLIB_VERNUM < 0x1230 |
| 278 | # error Need zlib version 1.2.3 or later |
| 279 | #endif |
| 280 | |
| 281 | #ifndef NOTHREAD |
| 282 | # include "yarn.h" /* thread, launch(), join(), join_all(), */ |
| 283 | /* lock, new_lock(), possess(), twist(), wait_for(), |
| 284 | release(), peek_lock(), free_lock(), yarn_name */ |
| 285 | #endif |
| 286 | |
| 287 | /* for local functions and globals */ |
| 288 | #define local static |
| 289 | |
| 290 | /* prevent end-of-line conversions on MSDOSish operating systems */ |
| 291 | #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) |
| 292 | # include <io.h> /* setmode(), O_BINARY */ |
| 293 | # define SET_BINARY_MODE(fd) setmode(fd, O_BINARY) |
| 294 | #else |
| 295 | # define SET_BINARY_MODE(fd) |
| 296 | #endif |
| 297 | |
| 298 | /* release an allocated pointer, if allocated, and mark as unallocated */ |
| 299 | #define RELEASE(ptr) \ |
| 300 | do { \ |
| 301 | if ((ptr) != NULL) { \ |
| 302 | free(ptr); \ |
| 303 | ptr = NULL; \ |
| 304 | } \ |
| 305 | } while (0) |
| 306 | |
| 307 | /* globals (modified by main thread only when it's the only thread) */ |
| 308 | local int ind; /* input file descriptor */ |
| 309 | local int outd; /* output file descriptor */ |
| 310 | local char in[PATH_MAX+1]; /* input file name (accommodate recursion) */ |
| 311 | local char *out = NULL; /* output file name (allocated if not NULL) */ |
| 312 | local int verbosity; /* 0 = quiet, 1 = normal, 2 = verbose, 3 = trace */ |
| 313 | local int headis; /* 1 to store name, 2 to store date, 3 both */ |
| 314 | local int pipeout; /* write output to stdout even if file */ |
| 315 | local int keep; /* true to prevent deletion of input file */ |
| 316 | local int force; /* true to overwrite, compress links */ |
| 317 | local int form; /* gzip = 0, zlib = 1, zip = 2 or 3 */ |
| 318 | local int recurse; /* true to dive down into directory structure */ |
| 319 | local char *sufx; /* suffix to use (".gz" or user supplied) */ |
| 320 | local char *name; /* name for gzip header */ |
| 321 | local time_t mtime; /* time stamp from input file for gzip header */ |
| 322 | local int list; /* true to list files instead of compress */ |
| 323 | local int first = 1; /* true if we need to print listing header */ |
| 324 | local int decode; /* 0 to compress, 1 to decompress, 2 to test */ |
| 325 | local int level; /* compression level */ |
| 326 | local int rsync; /* true for rsync blocking */ |
| 327 | local int procs; /* maximum number of compression threads (>= 1) */ |
| 328 | local int dict; /* true to initialize dictionary in each thread */ |
| 329 | local size_t size; /* uncompressed input size per thread (>= 32K) */ |
| 330 | |
| 331 | /* saved gzip/zip header data for decompression, testing, and listing */ |
| 332 | local time_t stamp; /* time stamp from gzip header */ |
| 333 | local char *hname = NULL; /* name from header (allocated) */ |
| 334 | local unsigned long zip_crc; /* local header crc */ |
| 335 | local unsigned long zip_clen; /* local header compressed length */ |
| 336 | local unsigned long zip_ulen; /* local header uncompressed length */ |
| 337 | |
| 338 | /* exit with error, delete output file if in the middle of writing it */ |
| 339 | local int bail(char *why, char *what) |
| 340 | { |
| 341 | if (outd != -1 && out != NULL) |
| 342 | unlink(out); |
| 343 | if (verbosity > 0) |
| 344 | fprintf(stderr, "pigz abort: %s%s\n", why, what); |
| 345 | exit(1); |
| 346 | return 0; |
| 347 | } |
| 348 | |
| 349 | #ifdef DEBUG |
| 350 | |
| 351 | /* starting time of day for tracing */ |
| 352 | local struct timeval start; |
| 353 | |
| 354 | /* trace log */ |
| 355 | local struct log { |
| 356 | struct timeval when; /* time of entry */ |
| 357 | char *msg; /* message */ |
| 358 | struct log *next; /* next entry */ |
| 359 | } *log_head, **log_tail = NULL; |
| 360 | #ifndef NOTHREAD |
| 361 | local lock *log_lock = NULL; |
| 362 | #endif |
| 363 | |
| 364 | /* maximum log entry length */ |
| 365 | #define MAXMSG 256 |
| 366 | |
| 367 | /* set up log (call from main thread before other threads launched) */ |
| 368 | local void log_init(void) |
| 369 | { |
| 370 | if (log_tail == NULL) { |
| 371 | #ifndef NOTHREAD |
| 372 | log_lock = new_lock(0); |
| 373 | #endif |
| 374 | log_head = NULL; |
| 375 | log_tail = &log_head; |
| 376 | } |
| 377 | } |
| 378 | |
| 379 | /* add entry to trace log */ |
| 380 | local void log_add(char *fmt, ...) |
| 381 | { |
| 382 | struct timeval now; |
| 383 | struct log *me; |
| 384 | va_list ap; |
| 385 | char msg[MAXMSG]; |
| 386 | |
| 387 | gettimeofday(&now, NULL); |
| 388 | me = malloc(sizeof(struct log)); |
| 389 | if (me == NULL) |
| 390 | bail("not enough memory", ""); |
| 391 | me->when = now; |
| 392 | va_start(ap, fmt); |
| 393 | vsnprintf(msg, MAXMSG, fmt, ap); |
| 394 | va_end(ap); |
| 395 | me->msg = malloc(strlen(msg) + 1); |
| 396 | if (me->msg == NULL) { |
| 397 | free(me); |
| 398 | bail("not enough memory", ""); |
| 399 | } |
| 400 | strcpy(me->msg, msg); |
| 401 | me->next = NULL; |
| 402 | #ifndef NOTHREAD |
| 403 | assert(log_lock != NULL); |
| 404 | possess(log_lock); |
| 405 | #endif |
| 406 | *log_tail = me; |
| 407 | log_tail = &(me->next); |
| 408 | #ifndef NOTHREAD |
| 409 | twist(log_lock, BY, +1); |
| 410 | #endif |
| 411 | } |
| 412 | |
| 413 | /* pull entry from trace log and print it, return false if empty */ |
| 414 | local int log_show(void) |
| 415 | { |
| 416 | struct log *me; |
| 417 | struct timeval diff; |
| 418 | |
| 419 | if (log_tail == NULL) |
| 420 | return 0; |
| 421 | #ifndef NOTHREAD |
| 422 | possess(log_lock); |
| 423 | #endif |
| 424 | me = log_head; |
| 425 | if (me == NULL) { |
| 426 | #ifndef NOTHREAD |
| 427 | release(log_lock); |
| 428 | #endif |
| 429 | return 0; |
| 430 | } |
| 431 | log_head = me->next; |
| 432 | if (me->next == NULL) |
| 433 | log_tail = &log_head; |
| 434 | #ifndef NOTHREAD |
| 435 | twist(log_lock, BY, -1); |
| 436 | #endif |
| 437 | diff.tv_usec = me->when.tv_usec - start.tv_usec; |
| 438 | diff.tv_sec = me->when.tv_sec - start.tv_sec; |
| 439 | if (diff.tv_usec < 0) { |
| 440 | diff.tv_usec += 1000000L; |
| 441 | diff.tv_sec--; |
| 442 | } |
| 443 | fprintf(stderr, "trace %ld.%06ld %s\n", |
| 444 | (long)diff.tv_sec, (long)diff.tv_usec, me->msg); |
| 445 | fflush(stderr); |
| 446 | free(me->msg); |
| 447 | free(me); |
| 448 | return 1; |
| 449 | } |
| 450 | |
| 451 | /* release log resources (need to do log_init() to use again) */ |
| 452 | local void log_free(void) |
| 453 | { |
| 454 | struct log *me; |
| 455 | |
| 456 | if (log_tail != NULL) { |
| 457 | #ifndef NOTHREAD |
| 458 | possess(log_lock); |
| 459 | #endif |
| 460 | while ((me = log_head) != NULL) { |
| 461 | log_head = me->next; |
| 462 | free(me->msg); |
| 463 | free(me); |
| 464 | } |
| 465 | #ifndef NOTHREAD |
| 466 | twist(log_lock, TO, 0); |
| 467 | free_lock(log_lock); |
| 468 | log_lock = NULL; |
| 469 | #endif |
| 470 | log_tail = NULL; |
| 471 | } |
| 472 | } |
| 473 | |
| 474 | /* show entries until no more, free log */ |
| 475 | local void log_dump(void) |
| 476 | { |
| 477 | if (log_tail == NULL) |
| 478 | return; |
| 479 | while (log_show()) |
| 480 | ; |
| 481 | log_free(); |
| 482 | } |
| 483 | |
| 484 | /* debugging macro */ |
| 485 | #define Trace(x) \ |
| 486 | do { \ |
| 487 | if (verbosity > 2) { \ |
| 488 | log_add x; \ |
| 489 | } \ |
| 490 | } while (0) |
| 491 | |
| 492 | #else /* !DEBUG */ |
| 493 | |
| 494 | #define log_dump() |
| 495 | #define Trace(x) |
| 496 | |
| 497 | #endif |
| 498 | |
| 499 | /* read up to len bytes into buf, repeating read() calls as needed */ |
| 500 | local size_t readn(int desc, unsigned char *buf, size_t len) |
| 501 | { |
| 502 | ssize_t ret; |
| 503 | size_t got; |
| 504 | |
| 505 | got = 0; |
| 506 | while (len) { |
| 507 | ret = read(desc, buf, len); |
| 508 | if (ret < 0) |
| 509 | bail("read error on ", in); |
| 510 | if (ret == 0) |
| 511 | break; |
| 512 | buf += ret; |
| 513 | len -= ret; |
| 514 | got += ret; |
| 515 | } |
| 516 | return got; |
| 517 | } |
| 518 | |
| 519 | /* write len bytes, repeating write() calls as needed */ |
| 520 | local void writen(int desc, unsigned char *buf, size_t len) |
| 521 | { |
| 522 | ssize_t ret; |
| 523 | |
| 524 | while (len) { |
| 525 | ret = write(desc, buf, len); |
| 526 | if (ret < 1) |
| 527 | fprintf(stderr, "write error code %d\n", errno); |
| 528 | if (ret < 1) |
| 529 | bail("write error on ", out); |
| 530 | buf += ret; |
| 531 | len -= ret; |
| 532 | } |
| 533 | } |
| 534 | |
| 535 | /* sliding dictionary size for deflate */ |
| 536 | #define DICT 32768U |
| 537 | |
| 538 | /* largest power of 2 that fits in an unsigned int -- used to limit requests |
| 539 | to zlib functions that use unsigned int lengths */ |
| 540 | #define MAX ((((unsigned)0 - 1) >> 1) + 1) |
| 541 | |
| 542 | /* convert Unix time to MS-DOS date and time, assuming current timezone |
| 543 | (you got a better idea?) */ |
| 544 | local unsigned long time2dos(time_t t) |
| 545 | { |
| 546 | struct tm *tm; |
| 547 | unsigned long dos; |
| 548 | |
| 549 | if (t == 0) |
| 550 | t = time(NULL); |
| 551 | tm = localtime(&t); |
| 552 | if (tm->tm_year < 80 || tm->tm_year > 207) |
| 553 | return 0; |
| 554 | dos = (tm->tm_year - 80) << 25; |
| 555 | dos += (tm->tm_mon + 1) << 21; |
| 556 | dos += tm->tm_mday << 16; |
| 557 | dos += tm->tm_hour << 11; |
| 558 | dos += tm->tm_min << 5; |
| 559 | dos += (tm->tm_sec + 1) >> 1; /* round to double-seconds */ |
| 560 | return dos; |
| 561 | } |
| 562 | |
| 563 | /* put a 4-byte integer into a byte array in LSB order or MSB order */ |
| 564 | #define PUT2L(a,b) (*(a)=(b)&0xff,(a)[1]=(b)>>8) |
| 565 | #define PUT4L(a,b) (PUT2L(a,(b)&0xffff),PUT2L((a)+2,(b)>>16)) |
| 566 | #define PUT4M(a,b) (*(a)=(b)>>24,(a)[1]=(b)>>16,(a)[2]=(b)>>8,(a)[3]=(b)) |
| 567 | |
| 568 | /* write a gzip, zlib, or zip header using the information in the globals */ |
| 569 | local unsigned long put_header(void) |
| 570 | { |
| 571 | unsigned long len; |
| 572 | unsigned char head[30]; |
| 573 | |
| 574 | if (form > 1) { /* zip */ |
| 575 | /* write local header */ |
| 576 | PUT4L(head, 0x04034b50UL); /* local header signature */ |
| 577 | PUT2L(head + 4, 20); /* version needed to extract (2.0) */ |
| 578 | PUT2L(head + 6, 8); /* flags: data descriptor follows data */ |
| 579 | PUT2L(head + 8, 8); /* deflate */ |
| 580 | PUT4L(head + 10, time2dos(mtime)); |
| 581 | PUT4L(head + 14, 0); /* crc (not here) */ |
| 582 | PUT4L(head + 18, 0); /* compressed length (not here) */ |
| 583 | PUT4L(head + 22, 0); /* uncompressed length (not here) */ |
| 584 | PUT2L(head + 26, name == NULL ? 1 : strlen(name)); /* name length */ |
| 585 | PUT2L(head + 28, 9); /* length of extra field (see below) */ |
| 586 | writen(outd, head, 30); /* write local header */ |
| 587 | len = 30; |
| 588 | |
| 589 | /* write file name (use "-" for stdin) */ |
| 590 | if (name == NULL) |
| 591 | writen(outd, (unsigned char *)"-", 1); |
| 592 | else |
| 593 | writen(outd, (unsigned char *)name, strlen(name)); |
| 594 | len += name == NULL ? 1 : strlen(name); |
| 595 | |
| 596 | /* write extended timestamp extra field block (9 bytes) */ |
| 597 | PUT2L(head, 0x5455); /* extended timestamp signature */ |
| 598 | PUT2L(head + 2, 5); /* number of data bytes in this block */ |
| 599 | head[4] = 1; /* flag presence of mod time */ |
| 600 | PUT4L(head + 5, mtime); /* mod time */ |
| 601 | writen(outd, head, 9); /* write extra field block */ |
| 602 | len += 9; |
| 603 | } |
| 604 | else if (form) { /* zlib */ |
| 605 | head[0] = 0x78; /* deflate, 32K window */ |
| 606 | head[1] = (level == 9 ? 3 : (level == 1 ? 0 : |
| 607 | (level >= 6 || level == Z_DEFAULT_COMPRESSION ? 1 : 2))) << 6; |
| 608 | head[1] += 31 - (((head[0] << 8) + head[1]) % 31); |
| 609 | writen(outd, head, 2); |
| 610 | len = 2; |
| 611 | } |
| 612 | else { /* gzip */ |
| 613 | head[0] = 31; |
| 614 | head[1] = 139; |
| 615 | head[2] = 8; /* deflate */ |
| 616 | head[3] = name != NULL ? 8 : 0; |
| 617 | PUT4L(head + 4, mtime); |
| 618 | head[8] = level == 9 ? 2 : (level == 1 ? 4 : 0); |
| 619 | head[9] = 3; /* unix */ |
| 620 | writen(outd, head, 10); |
| 621 | len = 10; |
| 622 | if (name != NULL) |
| 623 | writen(outd, (unsigned char *)name, strlen(name) + 1); |
| 624 | if (name != NULL) |
| 625 | len += strlen(name) + 1; |
| 626 | } |
| 627 | return len; |
| 628 | } |
| 629 | |
| 630 | /* write a gzip, zlib, or zip trailer */ |
| 631 | local void put_trailer(unsigned long ulen, unsigned long clen, |
| 632 | unsigned long check, unsigned long head) |
| 633 | { |
| 634 | unsigned char tail[46]; |
| 635 | |
| 636 | if (form > 1) { /* zip */ |
| 637 | unsigned long cent; |
| 638 | |
| 639 | /* write data descriptor (as promised in local header) */ |
| 640 | PUT4L(tail, check); |
| 641 | PUT4L(tail + 4, clen); |
| 642 | PUT4L(tail + 8, ulen); |
| 643 | writen(outd, tail, 12); |
| 644 | |
| 645 | /* write central file header */ |
| 646 | PUT4L(tail, 0x02014b50UL); /* central header signature */ |
| 647 | tail[4] = 63; /* obeyed version 6.3 of the zip spec */ |
| 648 | tail[5] = 255; /* ignore external attributes */ |
| 649 | PUT2L(tail + 6, 20); /* version needed to extract (2.0) */ |
| 650 | PUT2L(tail + 8, 8); /* data descriptor is present */ |
| 651 | PUT2L(tail + 10, 8); /* deflate */ |
| 652 | PUT4L(tail + 12, time2dos(mtime)); |
| 653 | PUT4L(tail + 16, check); /* crc */ |
| 654 | PUT4L(tail + 20, clen); /* compressed length */ |
| 655 | PUT4L(tail + 24, ulen); /* uncompressed length */ |
| 656 | PUT2L(tail + 28, name == NULL ? 1 : strlen(name)); /* name length */ |
| 657 | PUT2L(tail + 30, 9); /* length of extra field (see below) */ |
| 658 | PUT2L(tail + 32, 0); /* no file comment */ |
| 659 | PUT2L(tail + 34, 0); /* disk number 0 */ |
| 660 | PUT2L(tail + 36, 0); /* internal file attributes */ |
| 661 | PUT4L(tail + 38, 0); /* external file attributes (ignored) */ |
| 662 | PUT4L(tail + 42, 0); /* offset of local header */ |
| 663 | writen(outd, tail, 46); /* write central file header */ |
| 664 | cent = 46; |
| 665 | |
| 666 | /* write file name (use "-" for stdin) */ |
| 667 | if (name == NULL) |
| 668 | writen(outd, (unsigned char *)"-", 1); |
| 669 | else |
| 670 | writen(outd, (unsigned char *)name, strlen(name)); |
| 671 | cent += name == NULL ? 1 : strlen(name); |
| 672 | |
| 673 | /* write extended timestamp extra field block (9 bytes) */ |
| 674 | PUT2L(tail, 0x5455); /* extended timestamp signature */ |
| 675 | PUT2L(tail + 2, 5); /* number of data bytes in this block */ |
| 676 | tail[4] = 1; /* flag presence of mod time */ |
| 677 | PUT4L(tail + 5, mtime); /* mod time */ |
| 678 | writen(outd, tail, 9); /* write extra field block */ |
| 679 | cent += 9; |
| 680 | |
| 681 | /* write end of central directory record */ |
| 682 | PUT4L(tail, 0x06054b50UL); /* end of central directory signature */ |
| 683 | PUT2L(tail + 4, 0); /* number of this disk */ |
| 684 | PUT2L(tail + 6, 0); /* disk with start of central directory */ |
| 685 | PUT2L(tail + 8, 1); /* number of entries on this disk */ |
| 686 | PUT2L(tail + 10, 1); /* total number of entries */ |
| 687 | PUT4L(tail + 12, cent); /* size of central directory */ |
| 688 | PUT4L(tail + 16, head + clen + 12); /* offset of central directory */ |
| 689 | PUT2L(tail + 20, 0); /* no zip file comment */ |
| 690 | writen(outd, tail, 22); /* write end of central directory record */ |
| 691 | } |
| 692 | else if (form) { /* zlib */ |
| 693 | PUT4M(tail, check); |
| 694 | writen(outd, tail, 4); |
| 695 | } |
| 696 | else { /* gzip */ |
| 697 | PUT4L(tail, check); |
| 698 | PUT4L(tail + 4, ulen); |
| 699 | writen(outd, tail, 8); |
| 700 | } |
| 701 | } |
| 702 | |
| 703 | /* compute check value depending on format */ |
| 704 | #define CHECK(a,b,c) (form == 1 ? adler32(a,b,c) : crc32(a,b,c)) |
| 705 | |
| 706 | #ifndef NOTHREAD |
| 707 | /* -- threaded portions of pigz -- */ |
| 708 | |
| 709 | /* -- check value combination routines for parallel calculation -- */ |
| 710 | |
| 711 | #define COMB(a,b,c) (form == 1 ? adler32_comb(a,b,c) : crc32_comb(a,b,c)) |
| 712 | /* combine two crc-32's or two adler-32's (copied from zlib 1.2.3 so that pigz |
| 713 | can be compatible with older versions of zlib) */ |
| 714 | |
| 715 | /* we copy the combination routines from zlib here, in order to avoid |
| 716 | linkage issues with the zlib 1.2.3 builds on Sun, Ubuntu, and others */ |
| 717 | |
| 718 | local unsigned long gf2_matrix_times(unsigned long *mat, unsigned long vec) |
| 719 | { |
| 720 | unsigned long sum; |
| 721 | |
| 722 | sum = 0; |
| 723 | while (vec) { |
| 724 | if (vec & 1) |
| 725 | sum ^= *mat; |
| 726 | vec >>= 1; |
| 727 | mat++; |
| 728 | } |
| 729 | return sum; |
| 730 | } |
| 731 | |
| 732 | local void gf2_matrix_square(unsigned long *square, unsigned long *mat) |
| 733 | { |
| 734 | int n; |
| 735 | |
| 736 | for (n = 0; n < 32; n++) |
| 737 | square[n] = gf2_matrix_times(mat, mat[n]); |
| 738 | } |
| 739 | |
| 740 | local unsigned long crc32_comb(unsigned long crc1, unsigned long crc2, |
| 741 | size_t len2) |
| 742 | { |
| 743 | int n; |
| 744 | unsigned long row; |
| 745 | unsigned long even[32]; /* even-power-of-two zeros operator */ |
| 746 | unsigned long odd[32]; /* odd-power-of-two zeros operator */ |
| 747 | |
| 748 | /* degenerate case */ |
| 749 | if (len2 == 0) |
| 750 | return crc1; |
| 751 | |
| 752 | /* put operator for one zero bit in odd */ |
| 753 | odd[0] = 0xedb88320UL; /* CRC-32 polynomial */ |
| 754 | row = 1; |
| 755 | for (n = 1; n < 32; n++) { |
| 756 | odd[n] = row; |
| 757 | row <<= 1; |
| 758 | } |
| 759 | |
| 760 | /* put operator for two zero bits in even */ |
| 761 | gf2_matrix_square(even, odd); |
| 762 | |
| 763 | /* put operator for four zero bits in odd */ |
| 764 | gf2_matrix_square(odd, even); |
| 765 | |
| 766 | /* apply len2 zeros to crc1 (first square will put the operator for one |
| 767 | zero byte, eight zero bits, in even) */ |
| 768 | do { |
| 769 | /* apply zeros operator for this bit of len2 */ |
| 770 | gf2_matrix_square(even, odd); |
| 771 | if (len2 & 1) |
| 772 | crc1 = gf2_matrix_times(even, crc1); |
| 773 | len2 >>= 1; |
| 774 | |
| 775 | /* if no more bits set, then done */ |
| 776 | if (len2 == 0) |
| 777 | break; |
| 778 | |
| 779 | /* another iteration of the loop with odd and even swapped */ |
| 780 | gf2_matrix_square(odd, even); |
| 781 | if (len2 & 1) |
| 782 | crc1 = gf2_matrix_times(odd, crc1); |
| 783 | len2 >>= 1; |
| 784 | |
| 785 | /* if no more bits set, then done */ |
| 786 | } while (len2 != 0); |
| 787 | |
| 788 | /* return combined crc */ |
| 789 | crc1 ^= crc2; |
| 790 | return crc1; |
| 791 | } |
| 792 | |
| 793 | #define BASE 65521U /* largest prime smaller than 65536 */ |
| 794 | #define LOW16 0xffff /* mask lower 16 bits */ |
| 795 | |
| 796 | local unsigned long adler32_comb(unsigned long adler1, unsigned long adler2, |
| 797 | size_t len2) |
| 798 | { |
| 799 | unsigned long sum1; |
| 800 | unsigned long sum2; |
| 801 | unsigned rem; |
| 802 | |
| 803 | /* the derivation of this formula is left as an exercise for the reader */ |
| 804 | rem = (unsigned)(len2 % BASE); |
| 805 | sum1 = adler1 & LOW16; |
| 806 | sum2 = (rem * sum1) % BASE; |
| 807 | sum1 += (adler2 & LOW16) + BASE - 1; |
| 808 | sum2 += ((adler1 >> 16) & LOW16) + ((adler2 >> 16) & LOW16) + BASE - rem; |
| 809 | if (sum1 >= BASE) sum1 -= BASE; |
| 810 | if (sum1 >= BASE) sum1 -= BASE; |
| 811 | if (sum2 >= (BASE << 1)) sum2 -= (BASE << 1); |
| 812 | if (sum2 >= BASE) sum2 -= BASE; |
| 813 | return sum1 | (sum2 << 16); |
| 814 | } |
| 815 | |
| 816 | /* -- pool of spaces for buffer management -- */ |
| 817 | |
| 818 | /* These routines manage a pool of spaces. Each pool specifies a fixed size |
| 819 | buffer to be contained in each space. Each space has a use count, which |
| 820 | when decremented to zero returns the space to the pool. If a space is |
| 821 | requested from the pool and the pool is empty, a space is immediately |
| 822 | created unless a specified limit on the number of spaces has been reached. |
| 823 | Only if the limit is reached will it wait for a space to be returned to the |
| 824 | pool. Each space knows what pool it belongs to, so that it can be returned. |
| 825 | */ |
| 826 | |
| 827 | /* a space (one buffer for each space) */ |
| 828 | struct space { |
| 829 | lock *use; /* use count -- return to pool when zero */ |
| 830 | void *buf; /* buffer of size pool->size */ |
| 831 | size_t len; /* for application usage */ |
| 832 | struct pool *pool; /* pool to return to */ |
| 833 | struct space *next; /* for pool linked list */ |
| 834 | }; |
| 835 | |
| 836 | /* pool of spaces (one pool for each size needed) */ |
| 837 | struct pool { |
| 838 | lock *have; /* unused spaces available, lock for list */ |
| 839 | struct space *head; /* linked list of available buffers */ |
| 840 | size_t size; /* size of all buffers in this pool */ |
| 841 | int limit; /* number of new spaces allowed, or -1 */ |
| 842 | int made; /* number of buffers made */ |
| 843 | }; |
| 844 | |
| 845 | /* initialize a pool (pool structure itself provided, not allocated) -- the |
| 846 | limit is the maximum number of spaces in the pool, or -1 to indicate no |
| 847 | limit, i.e., to never wait for a buffer to return to the pool */ |
| 848 | local void new_pool(struct pool *pool, size_t size, int limit) |
| 849 | { |
| 850 | pool->have = new_lock(0); |
| 851 | pool->head = NULL; |
| 852 | pool->size = size; |
| 853 | pool->limit = limit; |
| 854 | pool->made = 0; |
| 855 | } |
| 856 | |
| 857 | /* get a space from a pool -- the use count is initially set to one, so there |
| 858 | is no need to call use_space() for the first use */ |
| 859 | local struct space *get_space(struct pool *pool) |
| 860 | { |
| 861 | struct space *space; |
| 862 | |
| 863 | /* if can't create any more, wait for a space to show up */ |
| 864 | possess(pool->have); |
| 865 | if (pool->limit == 0) |
| 866 | wait_for(pool->have, NOT_TO_BE, 0); |
| 867 | |
| 868 | /* if a space is available, pull it from the list and return it */ |
| 869 | if (pool->head != NULL) { |
| 870 | space = pool->head; |
| 871 | possess(space->use); |
| 872 | pool->head = space->next; |
| 873 | twist(pool->have, BY, -1); /* one less in pool */ |
| 874 | twist(space->use, TO, 1); /* initially one user */ |
| 875 | return space; |
| 876 | } |
| 877 | |
| 878 | /* nothing available, don't want to wait, make a new space */ |
| 879 | assert(pool->limit != 0); |
| 880 | if (pool->limit > 0) |
| 881 | pool->limit--; |
| 882 | pool->made++; |
| 883 | release(pool->have); |
| 884 | space = malloc(sizeof(struct space)); |
| 885 | if (space == NULL) |
| 886 | bail("not enough memory", ""); |
| 887 | space->use = new_lock(1); /* initially one user */ |
| 888 | space->buf = malloc(pool->size); |
| 889 | if (space->buf == NULL) |
| 890 | bail("not enough memory", ""); |
| 891 | space->pool = pool; /* remember the pool this belongs to */ |
| 892 | return space; |
| 893 | } |
| 894 | |
| 895 | /* increment the use count to require one more drop before returning this space |
| 896 | to the pool */ |
| 897 | local void use_space(struct space *space) |
| 898 | { |
| 899 | possess(space->use); |
| 900 | twist(space->use, BY, +1); |
| 901 | } |
| 902 | |
| 903 | /* drop a space, returning it to the pool if the use count is zero */ |
| 904 | local void drop_space(struct space *space) |
| 905 | { |
| 906 | int use; |
| 907 | struct pool *pool; |
| 908 | |
| 909 | possess(space->use); |
| 910 | use = peek_lock(space->use); |
| 911 | assert(use != 0); |
| 912 | if (use == 1) { |
| 913 | pool = space->pool; |
| 914 | possess(pool->have); |
| 915 | space->next = pool->head; |
| 916 | pool->head = space; |
| 917 | twist(pool->have, BY, +1); |
| 918 | } |
| 919 | twist(space->use, BY, -1); |
| 920 | } |
| 921 | |
| 922 | /* free the memory and lock resources of a pool -- return number of spaces for |
| 923 | debugging and resource usage measurement */ |
| 924 | local int free_pool(struct pool *pool) |
| 925 | { |
| 926 | int count; |
| 927 | struct space *space; |
| 928 | |
| 929 | possess(pool->have); |
| 930 | count = 0; |
| 931 | while ((space = pool->head) != NULL) { |
| 932 | pool->head = space->next; |
| 933 | free(space->buf); |
| 934 | free_lock(space->use); |
| 935 | free(space); |
| 936 | count++; |
| 937 | } |
| 938 | release(pool->have); |
| 939 | free_lock(pool->have); |
| 940 | assert(count == pool->made); |
| 941 | return count; |
| 942 | } |
| 943 | |
| 944 | /* input and output buffer pools */ |
| 945 | local struct pool in_pool; |
| 946 | local struct pool out_pool; |
| 947 | |
| 948 | /* -- parallel compression -- */ |
| 949 | |
| 950 | /* compress or write job (passed from compress list to write list) -- if seq is |
| 951 | equal to -1, compress_thread is instructed to return; if more is false then |
| 952 | this is the last chunk, which after writing tells write_thread to return */ |
| 953 | struct job { |
| 954 | long seq; /* sequence number */ |
| 955 | int more; /* true if this is not the last chunk */ |
| 956 | struct space *in; /* input data to compress */ |
| 957 | struct space *out; /* dictionary or resulting compressed data */ |
| 958 | unsigned long check; /* check value for input data */ |
| 959 | lock *calc; /* released when check calculation complete */ |
| 960 | struct job *next; /* next job in the list (either list) */ |
| 961 | }; |
| 962 | |
| 963 | /* list of compress jobs (with tail for appending to list) */ |
| 964 | local lock *compress_have = NULL; /* number of compress jobs waiting */ |
| 965 | local struct job *compress_head, **compress_tail; |
| 966 | |
| 967 | /* list of write jobs */ |
| 968 | local lock *write_first; /* lowest sequence number in list */ |
| 969 | local struct job *write_head; |
| 970 | |
| 971 | /* number of compression threads running */ |
| 972 | local int cthreads = 0; |
| 973 | |
| 974 | /* write thread if running */ |
| 975 | local thread *writeth = NULL; |
| 976 | |
| 977 | /* setup job lists (call from main thread) */ |
| 978 | local void setup_jobs(void) |
| 979 | { |
| 980 | /* set up only if not already set up*/ |
| 981 | if (compress_have != NULL) |
| 982 | return; |
| 983 | |
| 984 | /* allocate locks and initialize lists */ |
| 985 | compress_have = new_lock(0); |
| 986 | compress_head = NULL; |
| 987 | compress_tail = &compress_head; |
| 988 | write_first = new_lock(-1); |
| 989 | write_head = NULL; |
| 990 | |
| 991 | /* initialize buffer pools */ |
| 992 | new_pool(&in_pool, size, (procs << 1) + 2); |
| 993 | new_pool(&out_pool, size + (size >> 11) + 10, -1); |
| 994 | } |
| 995 | |
| 996 | /* command the compress threads to all return, then join them all (call from |
| 997 | main thread), free all the thread-related resources */ |
| 998 | local void finish_jobs(void) |
| 999 | { |
| 1000 | struct job job; |
| 1001 | int caught; |
| 1002 | |
| 1003 | /* only do this once */ |
| 1004 | if (compress_have == NULL) |
| 1005 | return; |
| 1006 | |
| 1007 | /* command all of the extant compress threads to return */ |
| 1008 | possess(compress_have); |
| 1009 | job.seq = -1; |
| 1010 | job.next = NULL; |
| 1011 | compress_head = &job; |
| 1012 | compress_tail = &(job.next); |
| 1013 | twist(compress_have, BY, +1); /* will wake them all up */ |
| 1014 | |
| 1015 | /* join all of the compress threads, verify they all came back */ |
| 1016 | caught = join_all(); |
| 1017 | Trace(("-- joined %d compress threads", caught)); |
| 1018 | assert(caught == cthreads); |
| 1019 | cthreads = 0; |
| 1020 | |
| 1021 | /* free the resources */ |
| 1022 | caught = free_pool(&out_pool); |
| 1023 | Trace(("-- freed %d output buffers", caught)); |
| 1024 | caught = free_pool(&in_pool); |
| 1025 | Trace(("-- freed %d input buffers", caught)); |
| 1026 | free_lock(write_first); |
| 1027 | free_lock(compress_have); |
| 1028 | compress_have = NULL; |
| 1029 | } |
| 1030 | |
| 1031 | /* get the next compression job from the head of the list, compress and compute |
| 1032 | the check value on the input, and put a job in the write list with the |
| 1033 | results -- keep looking for more jobs, returning when a job is found with a |
| 1034 | sequence number of -1 (leave that job in the list for other incarnations to |
| 1035 | find) */ |
| 1036 | local void compress_thread(void *dummy) |
| 1037 | { |
| 1038 | struct job *job; /* job pulled and working on */ |
| 1039 | struct job *here, **prior; /* pointers for inserting in write list */ |
| 1040 | unsigned long check; /* check value of input */ |
| 1041 | unsigned char *next; /* pointer for check value data */ |
| 1042 | size_t len; /* remaining bytes to compress/check */ |
| 1043 | z_stream strm; /* deflate stream */ |
| 1044 | |
| 1045 | (void)dummy; |
| 1046 | |
| 1047 | /* initialize the deflate stream for this thread */ |
| 1048 | strm.zfree = Z_NULL; |
| 1049 | strm.zalloc = Z_NULL; |
| 1050 | strm.opaque = Z_NULL; |
| 1051 | if (deflateInit2(&strm, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY) != |
| 1052 | Z_OK) |
| 1053 | bail("not enough memory", ""); |
| 1054 | |
| 1055 | /* keep looking for work */ |
| 1056 | for (;;) { |
| 1057 | /* get a job (like I tell my son) */ |
| 1058 | possess(compress_have); |
| 1059 | wait_for(compress_have, NOT_TO_BE, 0); |
| 1060 | job = compress_head; |
| 1061 | assert(job != NULL); |
| 1062 | if (job->seq == -1) |
| 1063 | break; |
| 1064 | compress_head = job->next; |
| 1065 | if (job->next == NULL) |
| 1066 | compress_tail = &compress_head; |
| 1067 | twist(compress_have, BY, -1); |
| 1068 | |
| 1069 | /* got a job -- initialize and set the compression level (note that if |
| 1070 | deflateParams() is called immediately after deflateReset(), there is |
| 1071 | no need to initialize the input/output for the stream) */ |
| 1072 | Trace(("-- compressing #%ld", job->seq)); |
| 1073 | (void)deflateReset(&strm); |
| 1074 | (void)deflateParams(&strm, level, Z_DEFAULT_STRATEGY); |
| 1075 | |
| 1076 | /* set dictionary if provided, release that input buffer (only provided |
| 1077 | if dict is true and if this is not the first work unit) -- the |
| 1078 | amount of data in the buffer is assured to be >= DICT */ |
| 1079 | if (job->out != NULL) { |
| 1080 | len = job->out->len; |
| 1081 | deflateSetDictionary(&strm, |
| 1082 | (unsigned char *)(job->out->buf) + (len - DICT), DICT); |
| 1083 | drop_space(job->out); |
| 1084 | } |
| 1085 | |
| 1086 | /* set up input and output (the output size is assured to be big enough |
| 1087 | for the worst case expansion of the input buffer size, plus five |
| 1088 | bytes for the terminating stored block) */ |
| 1089 | job->out = get_space(&out_pool); |
| 1090 | strm.next_in = job->in->buf; |
| 1091 | strm.next_out = job->out->buf; |
| 1092 | |
| 1093 | /* run MAX-sized amounts of input through deflate -- this loop is |
| 1094 | needed for those cases where the integer type is smaller than the |
| 1095 | size_t type, or when len is close to the limit of the size_t type */ |
| 1096 | len = job->in->len; |
| 1097 | while (len > MAX) { |
| 1098 | strm.avail_in = MAX; |
| 1099 | strm.avail_out = (unsigned)-1; |
| 1100 | (void)deflate(&strm, Z_NO_FLUSH); |
| 1101 | assert(strm.avail_in == 0 && strm.avail_out != 0); |
| 1102 | len -= MAX; |
| 1103 | } |
| 1104 | |
| 1105 | /* run the last piece through deflate -- terminate with a sync marker, |
| 1106 | or finish deflate stream if this is the last block */ |
| 1107 | strm.avail_in = (unsigned)len; |
| 1108 | strm.avail_out = (unsigned)-1; |
| 1109 | (void)deflate(&strm, job->more ? Z_SYNC_FLUSH : Z_FINISH); |
| 1110 | assert(strm.avail_in == 0 && strm.avail_out != 0); |
| 1111 | job->out->len = strm.next_out - (unsigned char *)(job->out->buf); |
| 1112 | Trace(("-- compressed #%ld%s", job->seq, job->more ? "" : " (last)")); |
| 1113 | |
| 1114 | /* reserve input buffer until check value has been calculated */ |
| 1115 | use_space(job->in); |
| 1116 | |
| 1117 | /* insert write job in list in sorted order, alert write thread */ |
| 1118 | possess(write_first); |
| 1119 | prior = &write_head; |
| 1120 | while ((here = *prior) != NULL) { |
| 1121 | if (here->seq > job->seq) |
| 1122 | break; |
| 1123 | prior = &(here->next); |
| 1124 | } |
| 1125 | job->next = here; |
| 1126 | *prior = job; |
| 1127 | twist(write_first, TO, write_head->seq); |
| 1128 | |
| 1129 | /* calculate the check value in parallel with writing, alert the write |
| 1130 | thread that the calculation is complete, and drop this usage of the |
| 1131 | input buffer */ |
| 1132 | len = job->in->len; |
| 1133 | next = job->in->buf; |
| 1134 | check = CHECK(0L, Z_NULL, 0); |
| 1135 | while (len > MAX) { |
| 1136 | check = CHECK(check, next, MAX); |
| 1137 | len -= MAX; |
| 1138 | next += MAX; |
| 1139 | } |
| 1140 | check = CHECK(check, next, (unsigned)len); |
| 1141 | drop_space(job->in); |
| 1142 | job->check = check; |
| 1143 | possess(job->calc); |
| 1144 | twist(job->calc, TO, 1); |
| 1145 | Trace(("-- checked #%ld%s", job->seq, job->more ? "" : " (last)")); |
| 1146 | |
| 1147 | /* done with that one -- go find another job */ |
| 1148 | } |
| 1149 | |
| 1150 | /* found job with seq == -1 -- free deflate memory and return to join */ |
| 1151 | release(compress_have); |
| 1152 | deflateEnd(&strm); |
| 1153 | } |
| 1154 | |
| 1155 | /* collect the write jobs off of the list in sequence order and write out the |
| 1156 | compressed data until the last chunk is written -- also write the header and |
| 1157 | trailer and combine the individual check values of the input buffers */ |
| 1158 | local void write_thread(void *dummy) |
| 1159 | { |
| 1160 | long seq; /* next sequence number looking for */ |
| 1161 | struct job *job; /* job pulled and working on */ |
| 1162 | size_t len; /* input length */ |
| 1163 | int more; /* true if more chunks to write */ |
| 1164 | unsigned long head; /* header length */ |
| 1165 | unsigned long ulen; /* total uncompressed size (overflow ok) */ |
| 1166 | unsigned long clen; /* total compressed size (overflow ok) */ |
| 1167 | unsigned long check; /* check value of uncompressed data */ |
| 1168 | |
| 1169 | (void)dummy; |
| 1170 | |
| 1171 | /* build and write header */ |
| 1172 | Trace(("-- write thread running")); |
| 1173 | head = put_header(); |
| 1174 | |
| 1175 | /* process output of compress threads until end of input */ |
| 1176 | ulen = clen = 0; |
| 1177 | check = CHECK(0L, Z_NULL, 0); |
| 1178 | seq = 0; |
| 1179 | do { |
| 1180 | /* get next write job in order */ |
| 1181 | possess(write_first); |
| 1182 | wait_for(write_first, TO_BE, seq); |
| 1183 | job = write_head; |
| 1184 | write_head = job->next; |
| 1185 | twist(write_first, TO, write_head == NULL ? -1 : write_head->seq); |
| 1186 | |
| 1187 | /* update lengths, save uncompressed length for COMB */ |
| 1188 | more = job->more; |
| 1189 | len = job->in->len; |
| 1190 | drop_space(job->in); |
| 1191 | ulen += (unsigned long)len; |
| 1192 | clen += (unsigned long)(job->out->len); |
| 1193 | |
| 1194 | /* write the compressed data and drop the output buffer */ |
| 1195 | Trace(("-- writing #%ld", seq)); |
| 1196 | writen(outd, job->out->buf, job->out->len); |
| 1197 | drop_space(job->out); |
| 1198 | Trace(("-- wrote #%ld%s", seq, more ? "" : " (last)")); |
| 1199 | |
| 1200 | /* wait for check calculation to complete, then combine, once |
| 1201 | the compress thread is done with the input, release it */ |
| 1202 | possess(job->calc); |
| 1203 | wait_for(job->calc, TO_BE, 1); |
| 1204 | release(job->calc); |
| 1205 | check = COMB(check, job->check, len); |
| 1206 | |
| 1207 | /* free the job */ |
| 1208 | free_lock(job->calc); |
| 1209 | free(job); |
| 1210 | |
| 1211 | /* get the next buffer in sequence */ |
| 1212 | seq++; |
| 1213 | } while (more); |
| 1214 | |
| 1215 | /* write trailer */ |
| 1216 | put_trailer(ulen, clen, check, head); |
| 1217 | |
| 1218 | /* verify no more jobs, prepare for next use */ |
| 1219 | possess(compress_have); |
| 1220 | assert(compress_head == NULL && peek_lock(compress_have) == 0); |
| 1221 | release(compress_have); |
| 1222 | possess(write_first); |
| 1223 | assert(write_head == NULL); |
| 1224 | twist(write_first, TO, -1); |
| 1225 | } |
| 1226 | |
| 1227 | /* compress ind to outd, using multiple threads for the compression and check |
| 1228 | value calculations and one other thread for writing the output -- compress |
| 1229 | threads will be launched and left running (waiting actually) to support |
| 1230 | subsequent calls of parallel_compress() */ |
| 1231 | local void parallel_compress(void) |
| 1232 | { |
| 1233 | long seq; /* sequence number */ |
| 1234 | struct space *prev; /* previous input space */ |
| 1235 | struct space *next; /* next input space */ |
| 1236 | struct job *job; /* job for compress, then write */ |
| 1237 | int more; /* true if more input to read */ |
| 1238 | |
| 1239 | /* if first time or after an option change, setup the job lists */ |
| 1240 | setup_jobs(); |
| 1241 | |
| 1242 | /* start write thread */ |
| 1243 | writeth = launch(write_thread, NULL); |
| 1244 | |
| 1245 | /* read from input and start compress threads (write thread will pick up |
| 1246 | the output of the compress threads) */ |
| 1247 | seq = 0; |
| 1248 | prev = NULL; |
| 1249 | next = get_space(&in_pool); |
| 1250 | next->len = readn(ind, next->buf, next->pool->size); |
| 1251 | do { |
| 1252 | /* create a new job, use next input chunk, previous as dictionary */ |
| 1253 | job = malloc(sizeof(struct job)); |
| 1254 | if (job == NULL) |
| 1255 | bail("not enough memory", ""); |
| 1256 | job->seq = seq; |
| 1257 | job->in = next; |
| 1258 | job->out = dict ? prev : NULL; /* dictionary for compression */ |
| 1259 | job->calc = new_lock(0); |
| 1260 | |
| 1261 | /* check for end of file, reading next chunk if not sure */ |
| 1262 | if (next->len < next->pool->size) |
| 1263 | more = 0; |
| 1264 | else { |
| 1265 | next = get_space(&in_pool); |
| 1266 | next->len = readn(ind, next->buf, next->pool->size); |
| 1267 | more = next->len != 0; |
| 1268 | if (!more) |
| 1269 | drop_space(next); /* won't be using it */ |
| 1270 | if (dict && more) { |
| 1271 | use_space(job->in); /* hold as dictionary for next loop */ |
| 1272 | prev = job->in; |
| 1273 | } |
| 1274 | } |
| 1275 | job->more = more; |
| 1276 | Trace(("-- read #%ld%s", seq, more ? "" : " (last)")); |
| 1277 | if (++seq < 1) |
| 1278 | bail("input too long: ", in); |
| 1279 | |
| 1280 | /* start another compress thread if needed */ |
| 1281 | if (cthreads < seq && cthreads < procs) { |
| 1282 | (void)launch(compress_thread, NULL); |
| 1283 | cthreads++; |
| 1284 | } |
| 1285 | |
| 1286 | /* put job at end of compress list, let all the compressors know */ |
| 1287 | possess(compress_have); |
| 1288 | job->next = NULL; |
| 1289 | *compress_tail = job; |
| 1290 | compress_tail = &(job->next); |
| 1291 | twist(compress_have, BY, +1); |
| 1292 | |
| 1293 | /* do until end of input */ |
| 1294 | } while (more); |
| 1295 | |
| 1296 | /* wait for the write thread to complete (we leave the compress threads out |
| 1297 | there and waiting in case there is another stream to compress) */ |
| 1298 | join(writeth); |
| 1299 | writeth = NULL; |
| 1300 | Trace(("-- write thread joined")); |
| 1301 | } |
| 1302 | |
| 1303 | #endif |
| 1304 | |
| 1305 | /* do a simple compression in a single thread from ind to outd -- if reset is |
| 1306 | true, instead free the memory that was allocated and retained for input, |
| 1307 | output, and deflate */ |
| 1308 | local void single_compress(int reset) |
| 1309 | { |
| 1310 | size_t got; /* amount read */ |
| 1311 | size_t more; /* amount of next read (0 if eof) */ |
| 1312 | unsigned long head; /* header length */ |
| 1313 | unsigned long ulen; /* total uncompressed size (overflow ok) */ |
| 1314 | unsigned long clen; /* total compressed size (overflow ok) */ |
| 1315 | unsigned long check; /* check value of uncompressed data */ |
| 1316 | static unsigned out_size; /* size of output buffer */ |
| 1317 | static unsigned char *in, *next, *out; /* reused i/o buffers */ |
| 1318 | static z_stream *strm = NULL; /* reused deflate structure */ |
| 1319 | |
| 1320 | /* if requested, just release the allocations and return */ |
| 1321 | if (reset) { |
| 1322 | if (strm != NULL) { |
| 1323 | deflateEnd(strm); |
| 1324 | free(strm); |
| 1325 | free(out); |
| 1326 | free(next); |
| 1327 | free(in); |
| 1328 | strm = NULL; |
| 1329 | } |
| 1330 | return; |
| 1331 | } |
| 1332 | |
| 1333 | /* initialize the deflate structure if this is the first time */ |
| 1334 | if (strm == NULL) { |
| 1335 | out_size = size > MAX ? MAX : (unsigned)size; |
| 1336 | if ((in = malloc(size)) == NULL || |
| 1337 | (next = malloc(size)) == NULL || |
| 1338 | (out = malloc(out_size)) == NULL || |
| 1339 | (strm = malloc(sizeof(z_stream))) == NULL) |
| 1340 | bail("not enough memory", ""); |
| 1341 | strm->zfree = Z_NULL; |
| 1342 | strm->zalloc = Z_NULL; |
| 1343 | strm->opaque = Z_NULL; |
| 1344 | if (deflateInit2(strm, level, Z_DEFLATED, -15, 8, |
| 1345 | Z_DEFAULT_STRATEGY) != Z_OK) |
| 1346 | bail("not enough memory", ""); |
| 1347 | } |
| 1348 | |
| 1349 | /* write header */ |
| 1350 | head = put_header(); |
| 1351 | |
| 1352 | /* set compression level in case it changed */ |
| 1353 | (void)deflateReset(strm); |
| 1354 | (void)deflateParams(strm, level, Z_DEFAULT_STRATEGY); |
| 1355 | |
| 1356 | /* do raw deflate and calculate check value */ |
| 1357 | ulen = clen = 0; |
| 1358 | check = CHECK(0L, Z_NULL, 0); |
| 1359 | more = readn(ind, next, size); |
| 1360 | do { |
| 1361 | /* get data to compress, see if there is any more input */ |
| 1362 | got = more; |
| 1363 | { unsigned char *temp; temp = in; in = next; next = temp; } |
| 1364 | more = got < size ? 0 : readn(ind, next, size); |
| 1365 | ulen += (unsigned long)got; |
| 1366 | strm->next_in = in; |
| 1367 | |
| 1368 | /* compress MAX-size chunks in case unsigned type is small */ |
| 1369 | while (got > MAX) { |
| 1370 | strm->avail_in = MAX; |
| 1371 | check = CHECK(check, strm->next_in, strm->avail_in); |
| 1372 | do { |
| 1373 | strm->avail_out = out_size; |
| 1374 | strm->next_out = out; |
| 1375 | (void)deflate(strm, Z_NO_FLUSH); |
| 1376 | writen(outd, out, out_size - strm->avail_out); |
| 1377 | clen += out_size - strm->avail_out; |
| 1378 | } while (strm->avail_out == 0); |
| 1379 | assert(strm->avail_in == 0); |
| 1380 | got -= MAX; |
| 1381 | } |
| 1382 | |
| 1383 | /* compress the remainder, finishing if end of input -- when not -i, |
| 1384 | use a Z_SYNC_FLUSH so that this and parallel compression produce the |
| 1385 | same output */ |
| 1386 | strm->avail_in = (unsigned)got; |
| 1387 | check = CHECK(check, strm->next_in, strm->avail_in); |
| 1388 | do { |
| 1389 | strm->avail_out = out_size; |
| 1390 | strm->next_out = out; |
| 1391 | (void)deflate(strm, |
| 1392 | more ? (dict ? Z_SYNC_FLUSH : Z_FULL_FLUSH) : Z_FINISH); |
| 1393 | writen(outd, out, out_size - strm->avail_out); |
| 1394 | clen += out_size - strm->avail_out; |
| 1395 | } while (strm->avail_out == 0); |
| 1396 | assert(strm->avail_in == 0); |
| 1397 | |
| 1398 | /* do until no more input */ |
| 1399 | } while (more); |
| 1400 | |
| 1401 | /* write trailer */ |
| 1402 | put_trailer(ulen, clen, check, head); |
| 1403 | } |
| 1404 | |
| 1405 | /* --- decompression --- */ |
| 1406 | |
| 1407 | /* globals for decompression and listing buffered reading */ |
| 1408 | #define BUF 32768U /* input buffer size */ |
| 1409 | local unsigned char in_buf[BUF]; /* input buffer */ |
| 1410 | local unsigned char *in_next; /* next unused byte in buffer */ |
| 1411 | local size_t in_left; /* number of unused bytes in buffer */ |
| 1412 | local int in_eof; /* true if reached end of file on input */ |
| 1413 | local int in_short; /* true if last read didn't fill buffer */ |
| 1414 | local off_t in_tot; /* total bytes read from input */ |
| 1415 | local off_t out_tot; /* total bytes written to output */ |
| 1416 | local unsigned long out_check; /* check value of output */ |
| 1417 | |
| 1418 | #ifndef NOTHREAD |
| 1419 | /* parallel reading */ |
| 1420 | |
| 1421 | local unsigned char in_buf2[BUF]; /* second buffer for parallel reads */ |
| 1422 | local size_t in_len; /* data waiting in next buffer */ |
| 1423 | local int in_which; /* -1: start, 0: in_buf2, 1: in_buf */ |
| 1424 | local lock *load_state; /* value = 0 to wait, 1 to read a buffer */ |
| 1425 | local thread *load_thread; /* load_read() thread for joining */ |
| 1426 | |
| 1427 | /* parallel read thread */ |
| 1428 | local void load_read(void *dummy) |
| 1429 | { |
| 1430 | size_t len; |
| 1431 | |
| 1432 | (void)dummy; |
| 1433 | |
| 1434 | Trace(("-- launched decompress read thread")); |
| 1435 | do { |
| 1436 | possess(load_state); |
| 1437 | wait_for(load_state, TO_BE, 1); |
| 1438 | in_len = len = readn(ind, in_which ? in_buf : in_buf2, BUF); |
| 1439 | Trace(("-- decompress read thread read %lu bytes", len)); |
| 1440 | twist(load_state, TO, 0); |
| 1441 | } while (len == BUF); |
| 1442 | Trace(("-- exited decompress read thread")); |
| 1443 | } |
| 1444 | |
| 1445 | #endif |
| 1446 | |
| 1447 | /* load() is called when in_left has gone to zero in order to provide more |
| 1448 | input data: load the input buffer with BUF or less bytes (less if at end of |
| 1449 | file) from the file ind, set in_next to point to the in_left bytes read, |
| 1450 | update in_tot, and return in_left -- in_eof is set to true when in_left has |
| 1451 | gone to zero and there is no more data left to read from ind */ |
| 1452 | local size_t load(void) |
| 1453 | { |
| 1454 | /* if already detected end of file, do nothing */ |
| 1455 | if (in_short) { |
| 1456 | in_eof = 1; |
| 1457 | return 0; |
| 1458 | } |
| 1459 | |
| 1460 | #ifndef NOTHREAD |
| 1461 | /* if first time in or procs == 1, read a buffer to have something to |
| 1462 | return, otherwise wait for the previous read job to complete */ |
| 1463 | if (procs > 1) { |
| 1464 | /* if first time, fire up the read thread, ask for a read */ |
| 1465 | if (in_which == -1) { |
| 1466 | in_which = 1; |
| 1467 | load_state = new_lock(1); |
| 1468 | load_thread = launch(load_read, NULL); |
| 1469 | } |
| 1470 | |
| 1471 | /* wait for the previously requested read to complete */ |
| 1472 | possess(load_state); |
| 1473 | wait_for(load_state, TO_BE, 0); |
| 1474 | release(load_state); |
| 1475 | |
| 1476 | /* set up input buffer with the data just read */ |
| 1477 | in_next = in_which ? in_buf : in_buf2; |
| 1478 | in_left = in_len; |
| 1479 | |
| 1480 | /* if not at end of file, alert read thread to load next buffer, |
| 1481 | alternate between in_buf and in_buf2 */ |
| 1482 | if (in_len == BUF) { |
| 1483 | in_which = 1 - in_which; |
| 1484 | possess(load_state); |
| 1485 | twist(load_state, TO, 1); |
| 1486 | } |
| 1487 | |
| 1488 | /* at end of file -- join read thread (already exited), clean up */ |
| 1489 | else { |
| 1490 | join(load_thread); |
| 1491 | free_lock(load_state); |
| 1492 | in_which = -1; |
| 1493 | } |
| 1494 | } |
| 1495 | else |
| 1496 | #endif |
| 1497 | { |
| 1498 | /* don't use threads -- simply read a buffer into in_buf */ |
| 1499 | in_left = readn(ind, in_next = in_buf, BUF); |
| 1500 | } |
| 1501 | |
| 1502 | /* note end of file */ |
| 1503 | if (in_left < BUF) { |
| 1504 | in_short = 1; |
| 1505 | |
| 1506 | /* if we got bupkis, now is the time to mark eof */ |
| 1507 | if (in_left == 0) |
| 1508 | in_eof = 1; |
| 1509 | } |
| 1510 | |
| 1511 | /* update the total and return the available bytes */ |
| 1512 | in_tot += in_left; |
| 1513 | return in_left; |
| 1514 | } |
| 1515 | |
| 1516 | /* initialize for reading new input */ |
| 1517 | local void in_init(void) |
| 1518 | { |
| 1519 | in_left = 0; |
| 1520 | in_eof = 0; |
| 1521 | in_short = 0; |
| 1522 | in_tot = 0; |
| 1523 | #ifndef NOTHREAD |
| 1524 | in_which = -1; |
| 1525 | #endif |
| 1526 | } |
| 1527 | |
| 1528 | /* buffered reading macros for decompression and listing */ |
| 1529 | #define GET() (in_eof || (in_left == 0 && load() == 0) ? EOF : \ |
| 1530 | (in_left--, *in_next++)) |
| 1531 | #define GET2() (tmp2 = GET(), tmp2 + (GET() << 8)) |
| 1532 | #define GET4() (tmp4 = GET2(), tmp4 + ((unsigned long)(GET2()) << 16)) |
| 1533 | #define SKIP(dist) \ |
| 1534 | do { \ |
| 1535 | size_t togo = (dist); \ |
| 1536 | while (togo > in_left) { \ |
| 1537 | togo -= in_left; \ |
| 1538 | if (load() == 0) \ |
| 1539 | return -1; \ |
| 1540 | } \ |
| 1541 | in_left -= togo; \ |
| 1542 | in_next += togo; \ |
| 1543 | } while (0) |
| 1544 | |
| 1545 | /* convert MS-DOS date and time to a Unix time, assuming current timezone |
| 1546 | (you got a better idea?) */ |
| 1547 | local time_t dos2time(unsigned long dos) |
| 1548 | { |
| 1549 | struct tm tm; |
| 1550 | |
| 1551 | if (dos == 0) |
| 1552 | return time(NULL); |
| 1553 | tm.tm_year = ((int)(dos >> 25) & 0x7f) + 80; |
| 1554 | tm.tm_mon = ((int)(dos >> 21) & 0xf) - 1; |
| 1555 | tm.tm_mday = (int)(dos >> 16) & 0x1f; |
| 1556 | tm.tm_hour = (int)(dos >> 11) & 0x1f; |
| 1557 | tm.tm_min = (int)(dos >> 5) & 0x3f; |
| 1558 | tm.tm_sec = (int)(dos << 1) & 0x3e; |
| 1559 | tm.tm_isdst = -1; /* figure out if DST or not */ |
| 1560 | return mktime(&tm); |
| 1561 | } |
| 1562 | |
| 1563 | /* convert an unsigned 32-bit integer to signed, even if long > 32 bits */ |
| 1564 | local long tolong(unsigned long val) |
| 1565 | { |
| 1566 | return (long)(val & 0x7fffffffUL) - (long)(val & 0x80000000UL); |
| 1567 | } |
| 1568 | |
| 1569 | #define LOW32 0xffffffffUL |
| 1570 | |
| 1571 | /* process zip extra field to extract zip64 lengths and Unix mod time */ |
| 1572 | local int read_extra(unsigned len, int save) |
| 1573 | { |
| 1574 | unsigned id, size, tmp2; |
| 1575 | unsigned long tmp4; |
| 1576 | |
| 1577 | /* process extra blocks */ |
| 1578 | while (len >= 4) { |
| 1579 | id = GET2(); |
| 1580 | size = GET2(); |
| 1581 | if (in_eof) |
| 1582 | return -1; |
| 1583 | len -= 4; |
| 1584 | if (size > len) |
| 1585 | break; |
| 1586 | len -= size; |
| 1587 | if (id == 0x0001) { |
| 1588 | /* Zip64 Extended Information Extra Field */ |
| 1589 | if (zip_ulen == LOW32 && size >= 8) { |
| 1590 | zip_ulen = GET4(); |
| 1591 | SKIP(4); |
| 1592 | size -= 8; |
| 1593 | } |
| 1594 | if (zip_clen == LOW32 && size >= 8) { |
| 1595 | zip_clen = GET4(); |
| 1596 | SKIP(4); |
| 1597 | size -= 8; |
| 1598 | } |
| 1599 | } |
| 1600 | if (save) { |
| 1601 | if ((id == 0x000d || id == 0x5855) && size >= 8) { |
| 1602 | /* PKWare Unix or Info-ZIP Type 1 Unix block */ |
| 1603 | SKIP(4); |
| 1604 | stamp = tolong(GET4()); |
| 1605 | size -= 8; |
| 1606 | } |
| 1607 | if (id == 0x5455 && size >= 5) { |
| 1608 | /* Extended Timestamp block */ |
| 1609 | size--; |
| 1610 | if (GET() & 1) { |
| 1611 | stamp = tolong(GET4()); |
| 1612 | size -= 4; |
| 1613 | } |
| 1614 | } |
| 1615 | } |
| 1616 | SKIP(size); |
| 1617 | } |
| 1618 | SKIP(len); |
| 1619 | return 0; |
| 1620 | } |
| 1621 | |
| 1622 | /* read a gzip, zip, zlib, or lzw header from ind and extract useful |
| 1623 | information, return the method -- or on error return negative: -1 is |
| 1624 | immediate EOF, -2 is not a recognized compressed format, -3 is premature EOF |
| 1625 | within the header, -4 is unexpected header flag values; a method of 256 is |
| 1626 | lzw -- set form to indicate gzip, zlib, or zip */ |
| 1627 | local int get_header(int save) |
| 1628 | { |
| 1629 | unsigned magic; /* magic header */ |
| 1630 | int method; /* compression method */ |
| 1631 | int flags; /* header flags */ |
| 1632 | unsigned fname, extra; /* name and extra field lengths */ |
| 1633 | unsigned tmp2; /* for macro */ |
| 1634 | unsigned long tmp4; /* for macro */ |
| 1635 | |
| 1636 | /* clear return information */ |
| 1637 | if (save) { |
| 1638 | stamp = 0; |
| 1639 | RELEASE(hname); |
| 1640 | } |
| 1641 | |
| 1642 | /* see if it's a gzip, zlib, or lzw file */ |
| 1643 | form = 0; |
| 1644 | magic = GET() << 8; |
| 1645 | if (in_eof) |
| 1646 | return -1; |
| 1647 | magic += GET(); |
| 1648 | if (in_eof) |
| 1649 | return -2; |
| 1650 | if (magic % 31 == 0) { /* it's zlib */ |
| 1651 | form = 1; |
| 1652 | return (int)((magic >> 8) & 0xf); |
| 1653 | } |
| 1654 | if (magic == 0x1f9d) /* it's lzw */ |
| 1655 | return 256; |
| 1656 | if (magic == 0x504b) { /* it's zip */ |
| 1657 | if (GET() != 3 || GET() != 4) |
| 1658 | return -3; |
| 1659 | SKIP(2); |
| 1660 | flags = GET2(); |
| 1661 | if (in_eof) |
| 1662 | return -3; |
| 1663 | if (flags & 0xfff0) |
| 1664 | return -4; |
| 1665 | method = GET2(); |
| 1666 | if (flags & 1) /* encrypted */ |
| 1667 | method = 255; /* mark as unknown method */ |
| 1668 | if (in_eof) |
| 1669 | return -3; |
| 1670 | if (save) |
| 1671 | stamp = dos2time(GET4()); |
| 1672 | else |
| 1673 | SKIP(4); |
| 1674 | zip_crc = GET4(); |
| 1675 | zip_clen = GET4(); |
| 1676 | zip_ulen = GET4(); |
| 1677 | fname = GET2(); |
| 1678 | extra = GET2(); |
| 1679 | if (save) { |
| 1680 | char *next = hname = malloc(fname + 1); |
| 1681 | if (hname == NULL) |
| 1682 | bail("not enough memory", ""); |
| 1683 | while (fname > in_left) { |
| 1684 | memcpy(next, in_next, in_left); |
| 1685 | fname -= in_left; |
| 1686 | next += in_left; |
| 1687 | if (load() == 0) |
| 1688 | return -3; |
| 1689 | } |
| 1690 | memcpy(next, in_next, fname); |
| 1691 | in_left -= fname; |
| 1692 | in_next += fname; |
| 1693 | next += fname; |
| 1694 | *next = 0; |
| 1695 | } |
| 1696 | else |
| 1697 | SKIP(fname); |
| 1698 | read_extra(extra, save); |
| 1699 | form = 2 + ((flags & 8) >> 3); |
| 1700 | return in_eof ? -3 : method; |
| 1701 | } |
| 1702 | if (magic != 0x1f8b) /* not gzip */ |
| 1703 | return -2; |
| 1704 | |
| 1705 | /* it's gzip -- get method and flags */ |
| 1706 | method = GET(); |
| 1707 | flags = GET(); |
| 1708 | if (in_eof) |
| 1709 | return -1; |
| 1710 | if (flags & 0xe0) |
| 1711 | return -4; |
| 1712 | |
| 1713 | /* get time stamp */ |
| 1714 | if (save) |
| 1715 | stamp = tolong(GET4()); |
| 1716 | else |
| 1717 | SKIP(4); |
| 1718 | |
| 1719 | /* skip extra field and OS */ |
| 1720 | SKIP(2); |
| 1721 | |
| 1722 | /* skip extra field, if present */ |
| 1723 | if (flags & 4) { |
| 1724 | extra = GET2(); |
| 1725 | if (in_eof) |
| 1726 | return -3; |
| 1727 | SKIP(extra); |
| 1728 | } |
| 1729 | |
| 1730 | /* read file name, if present, into allocated memory */ |
| 1731 | if ((flags & 8) && save) { |
| 1732 | unsigned char *end; |
| 1733 | size_t copy, have, size = 128; |
| 1734 | hname = malloc(size); |
| 1735 | if (hname == NULL) |
| 1736 | bail("not enough memory", ""); |
| 1737 | have = 0; |
| 1738 | do { |
| 1739 | if (in_left == 0 && load() == 0) |
| 1740 | return -3; |
| 1741 | end = memchr(in_next, 0, in_left); |
| 1742 | copy = end == NULL ? in_left : (size_t)(end - in_next) + 1; |
| 1743 | if (have + copy > size) { |
| 1744 | while (have + copy > (size <<= 1)) |
| 1745 | ; |
| 1746 | hname = realloc(hname, size); |
| 1747 | if (hname == NULL) |
| 1748 | bail("not enough memory", ""); |
| 1749 | } |
| 1750 | memcpy(hname + have, in_next, copy); |
| 1751 | have += copy; |
| 1752 | in_left -= copy; |
| 1753 | in_next += copy; |
| 1754 | } while (end == NULL); |
| 1755 | } |
| 1756 | else if (flags & 8) |
| 1757 | while (GET() != 0) |
| 1758 | if (in_eof) |
| 1759 | return -3; |
| 1760 | |
| 1761 | /* skip comment */ |
| 1762 | if (flags & 16) |
| 1763 | while (GET() != 0) |
| 1764 | if (in_eof) |
| 1765 | return -3; |
| 1766 | |
| 1767 | /* skip header crc */ |
| 1768 | if (flags & 2) |
| 1769 | SKIP(2); |
| 1770 | |
| 1771 | /* return compression method */ |
| 1772 | return method; |
| 1773 | } |
| 1774 | |
| 1775 | /* --- list contents of compressed input (gzip, zlib, or lzw) */ |
| 1776 | |
| 1777 | /* find standard compressed file suffix, return length of suffix */ |
| 1778 | local size_t compressed_suffix(char *nm) |
| 1779 | { |
| 1780 | size_t len; |
| 1781 | |
| 1782 | len = strlen(nm); |
| 1783 | if (len > 4) { |
| 1784 | nm += len - 4; |
| 1785 | len = 4; |
| 1786 | if (strcmp(nm, ".zip") == 0 || strcmp(nm, ".ZIP") == 0 || |
| 1787 | strcmp(nm, ".tgz") == 0) |
| 1788 | return 4; |
| 1789 | } |
| 1790 | if (len > 3) { |
| 1791 | nm += len - 3; |
| 1792 | len = 3; |
| 1793 | if (strcmp(nm, ".gz") == 0 || strcmp(nm, "-gz") == 0 || |
| 1794 | strcmp(nm, ".zz") == 0 || strcmp(nm, "-zz") == 0) |
| 1795 | return 3; |
| 1796 | } |
| 1797 | if (len > 2) { |
| 1798 | nm += len - 2; |
| 1799 | if (strcmp(nm, ".z") == 0 || strcmp(nm, "-z") == 0 || |
| 1800 | strcmp(nm, "_z") == 0 || strcmp(nm, ".Z") == 0) |
| 1801 | return 2; |
| 1802 | } |
| 1803 | return 0; |
| 1804 | } |
| 1805 | |
| 1806 | /* listing file name lengths for -l and -lv */ |
| 1807 | #define NAMEMAX1 48 /* name display limit at verbosity 1 */ |
| 1808 | #define NAMEMAX2 16 /* name display limit at verbosity 2 */ |
| 1809 | |
| 1810 | /* print gzip or lzw file information */ |
| 1811 | local void show_info(int method, unsigned long check, off_t len, int cont) |
| 1812 | { |
| 1813 | size_t max; /* maximum name length for current verbosity */ |
| 1814 | size_t n; /* name length without suffix */ |
| 1815 | time_t now; /* for getting current year */ |
| 1816 | char mod[26]; /* modification time in text */ |
| 1817 | char name[NAMEMAX1+1]; /* header or file name, possibly truncated */ |
| 1818 | |
| 1819 | /* create abbreviated name from header file name or actual file name */ |
| 1820 | max = verbosity > 1 ? NAMEMAX2 : NAMEMAX1; |
| 1821 | memset(name, 0, max + 1); |
| 1822 | if (cont) |
| 1823 | strncpy(name, "<...>", max + 1); |
| 1824 | else if (hname == NULL) { |
| 1825 | n = strlen(in) - compressed_suffix(in); |
| 1826 | strncpy(name, in, n > max + 1 ? max + 1 : n); |
| 1827 | } |
| 1828 | else |
| 1829 | strncpy(name, hname, max + 1); |
| 1830 | if (name[max]) |
| 1831 | strcpy(name + max - 3, "..."); |
| 1832 | |
| 1833 | /* convert time stamp to text */ |
| 1834 | if (stamp) { |
| 1835 | strcpy(mod, ctime(&stamp)); |
| 1836 | now = time(NULL); |
| 1837 | if (strcmp(mod + 20, ctime(&now) + 20) != 0) |
| 1838 | strcpy(mod + 11, mod + 19); |
| 1839 | } |
| 1840 | else |
| 1841 | strcpy(mod + 4, "------ -----"); |
| 1842 | mod[16] = 0; |
| 1843 | |
| 1844 | /* if first time, print header */ |
| 1845 | if (first) { |
| 1846 | if (verbosity > 1) |
| 1847 | fputs("method check timestamp ", stdout); |
| 1848 | if (verbosity > 0) |
| 1849 | puts("compressed original reduced name"); |
| 1850 | first = 0; |
| 1851 | } |
| 1852 | |
| 1853 | /* print information */ |
| 1854 | if (verbosity > 1) { |
| 1855 | if (form == 3 && !decode) |
| 1856 | printf("zip%3d -------- %s ", method, mod + 4); |
| 1857 | else if (form > 1) |
| 1858 | printf("zip%3d %08lx %s ", method, check, mod + 4); |
| 1859 | else if (form) |
| 1860 | printf("zlib%2d %08lx %s ", method, check, mod + 4); |
| 1861 | else if (method == 256) |
| 1862 | printf("lzw -------- %s ", mod + 4); |
| 1863 | else |
| 1864 | printf("gzip%2d %08lx %s ", method, check, mod + 4); |
| 1865 | } |
| 1866 | if (verbosity > 0) { |
| 1867 | if ((form == 3 && !decode) || |
| 1868 | (method == 8 && in_tot > (len + (len >> 10) + 12)) || |
| 1869 | (method == 256 && in_tot > len + (len >> 1) + 3)) |
| 1870 | printf(sizeof(off_t) == 4 ? "%10lu %10lu? unk %s\n" : |
| 1871 | "%10llu %10llu? unk %s\n", |
| 1872 | in_tot, len, name); |
| 1873 | else |
| 1874 | printf(sizeof(off_t) == 4 ? "%10lu %10lu %6.1f%% %s\n" : |
| 1875 | "%10llu %10llu %6.1f%% %s\n", |
| 1876 | in_tot, len, |
| 1877 | len == 0 ? 0 : 100 * (len - in_tot)/(double)len, |
| 1878 | name); |
| 1879 | } |
| 1880 | } |
| 1881 | |
| 1882 | /* list content information about the gzip file at ind (only works if the gzip |
| 1883 | file contains a single gzip stream with no junk at the end, and only works |
| 1884 | well if the uncompressed length is less than 4 GB) */ |
| 1885 | local void list_info(void) |
| 1886 | { |
| 1887 | int method; /* get_header() return value */ |
| 1888 | size_t n; /* available trailer bytes */ |
| 1889 | off_t at; /* used to calculate compressed length */ |
| 1890 | unsigned char tail[8]; /* trailer containing check and length */ |
| 1891 | unsigned long check, len; /* check value and length from trailer */ |
| 1892 | |
| 1893 | /* initialize input buffer */ |
| 1894 | in_init(); |
| 1895 | |
| 1896 | /* read header information and position input after header */ |
| 1897 | method = get_header(1); |
| 1898 | if (method < 0) { |
| 1899 | RELEASE(hname); |
| 1900 | if (method != -1 && verbosity > 1) |
| 1901 | fprintf(stderr, "%s not a compressed file -- skipping\n", in); |
| 1902 | return; |
| 1903 | } |
| 1904 | |
| 1905 | /* list zip file */ |
| 1906 | if (form > 1) { |
| 1907 | in_tot = zip_clen; |
| 1908 | show_info(method, zip_crc, zip_ulen, 0); |
| 1909 | return; |
| 1910 | } |
| 1911 | |
| 1912 | /* list zlib file */ |
| 1913 | if (form) { |
| 1914 | at = lseek(ind, 0, SEEK_END); |
| 1915 | if (at == -1) { |
| 1916 | check = 0; |
| 1917 | do { |
| 1918 | len = in_left < 4 ? in_left : 4; |
| 1919 | in_next += in_left - len; |
| 1920 | while (len--) |
| 1921 | check = (check << 8) + *in_next++; |
| 1922 | } while (load() != 0); |
| 1923 | check &= LOW32; |
| 1924 | } |
| 1925 | else { |
| 1926 | in_tot = at; |
| 1927 | lseek(ind, -4, SEEK_END); |
| 1928 | readn(ind, tail, 4); |
| 1929 | check = (*tail << 24) + (tail[1] << 16) + (tail[2] << 8) + tail[3]; |
| 1930 | } |
| 1931 | in_tot -= 6; |
| 1932 | show_info(method, check, 0, 0); |
| 1933 | return; |
| 1934 | } |
| 1935 | |
| 1936 | /* list lzw file */ |
| 1937 | if (method == 256) { |
| 1938 | at = lseek(ind, 0, SEEK_END); |
| 1939 | if (at == -1) |
| 1940 | while (load() != 0) |
| 1941 | ; |
| 1942 | else |
| 1943 | in_tot = at; |
| 1944 | in_tot -= 3; |
| 1945 | show_info(method, 0, 0, 0); |
| 1946 | return; |
| 1947 | } |
| 1948 | |
| 1949 | /* skip to end to get trailer (8 bytes), compute compressed length */ |
| 1950 | if (in_short) { /* whole thing already read */ |
| 1951 | if (in_left < 8) { |
| 1952 | if (verbosity > 0) |
| 1953 | fprintf(stderr, "%s not a valid gzip file -- skipping\n", |
| 1954 | in); |
| 1955 | return; |
| 1956 | } |
| 1957 | in_tot = in_left - 8; /* compressed size */ |
| 1958 | memcpy(tail, in_next + (in_left - 8), 8); |
| 1959 | } |
| 1960 | else if ((at = lseek(ind, -8, SEEK_END)) != -1) { |
| 1961 | in_tot = at - in_tot + in_left; /* compressed size */ |
| 1962 | readn(ind, tail, 8); /* get trailer */ |
| 1963 | } |
| 1964 | else { /* can't seek */ |
| 1965 | at = in_tot - in_left; /* save header size */ |
| 1966 | do { |
| 1967 | n = in_left < 8 ? in_left : 8; |
| 1968 | memcpy(tail, in_next + (in_left - n), n); |
| 1969 | load(); |
| 1970 | } while (in_left == BUF); /* read until end */ |
| 1971 | if (in_left < 8) { |
| 1972 | if (n + in_left < 8) { |
| 1973 | if (verbosity > 0) |
| 1974 | fprintf(stderr, "%s not a valid gzip file -- skipping\n", |
| 1975 | in); |
| 1976 | return; |
| 1977 | } |
| 1978 | if (in_left) { |
| 1979 | if (n + in_left > 8) |
| 1980 | memcpy(tail, tail + n - (8 - in_left), 8 - in_left); |
| 1981 | memcpy(tail + 8 - in_left, in_next, in_left); |
| 1982 | } |
| 1983 | } |
| 1984 | else |
| 1985 | memcpy(tail, in_next + (in_left - 8), 8); |
| 1986 | in_tot -= at + 8; |
| 1987 | } |
| 1988 | if (in_tot < 2) { |
| 1989 | if (verbosity > 0) |
| 1990 | fprintf(stderr, "%s not a valid gzip file -- skipping\n", in); |
| 1991 | return; |
| 1992 | } |
| 1993 | |
| 1994 | /* convert trailer to check and uncompressed length (modulo 2^32) */ |
| 1995 | check = tail[0] + (tail[1] << 8) + (tail[2] << 16) + (tail[3] << 24); |
| 1996 | len = tail[4] + (tail[5] << 8) + (tail[6] << 16) + (tail[7] << 24); |
| 1997 | |
| 1998 | /* list information about contents */ |
| 1999 | show_info(method, check, len, 0); |
| 2000 | RELEASE(hname); |
| 2001 | } |
| 2002 | |
| 2003 | /* --- decompress deflate input --- */ |
| 2004 | |
| 2005 | /* call-back input function for inflateBack() */ |
| 2006 | local unsigned inb(void *desc, unsigned char **buf) |
| 2007 | { |
| 2008 | (void)desc; |
| 2009 | load(); |
| 2010 | *buf = in_next; |
| 2011 | return in_left; |
| 2012 | } |
| 2013 | |
| 2014 | /* output buffers and window for infchk() and unlzw() */ |
| 2015 | #define OUTSIZE 32768U /* must be at least 32K for inflateBack() window */ |
| 2016 | local unsigned char out_buf[OUTSIZE]; |
| 2017 | |
| 2018 | #ifndef NOTHREAD |
| 2019 | /* output data for parallel write and check */ |
| 2020 | local unsigned char out_copy[OUTSIZE]; |
| 2021 | local size_t out_len; |
| 2022 | |
| 2023 | /* outb threads states */ |
| 2024 | local lock *outb_write_more = NULL; |
| 2025 | local lock *outb_check_more; |
| 2026 | |
| 2027 | /* output write thread */ |
| 2028 | local void outb_write(void *dummy) |
| 2029 | { |
| 2030 | size_t len; |
| 2031 | |
| 2032 | (void)dummy; |
| 2033 | |
| 2034 | Trace(("-- launched decompress write thread")); |
| 2035 | do { |
| 2036 | possess(outb_write_more); |
| 2037 | wait_for(outb_write_more, TO_BE, 1); |
| 2038 | len = out_len; |
| 2039 | if (len && decode == 1) |
| 2040 | writen(outd, out_copy, len); |
| 2041 | Trace(("-- decompress wrote %lu bytes", len)); |
| 2042 | twist(outb_write_more, TO, 0); |
| 2043 | } while (len); |
| 2044 | Trace(("-- exited decompress write thread")); |
| 2045 | } |
| 2046 | |
| 2047 | /* output check thread */ |
| 2048 | local void outb_check(void *dummy) |
| 2049 | { |
| 2050 | size_t len; |
| 2051 | |
| 2052 | (void)dummy; |
| 2053 | |
| 2054 | Trace(("-- launched decompress check thread")); |
| 2055 | do { |
| 2056 | possess(outb_check_more); |
| 2057 | wait_for(outb_check_more, TO_BE, 1); |
| 2058 | len = out_len; |
| 2059 | out_check = CHECK(out_check, out_copy, len); |
| 2060 | Trace(("-- decompress checked %lu bytes", len)); |
| 2061 | twist(outb_check_more, TO, 0); |
| 2062 | } while (len); |
| 2063 | Trace(("-- exited decompress check thread")); |
| 2064 | } |
| 2065 | #endif |
| 2066 | |
| 2067 | /* call-back output function for inflateBack() -- wait for the last write and |
| 2068 | check calculation to complete, copy the write buffer, and then alert the |
| 2069 | write and check threads and return for more decompression while that's |
| 2070 | going on (or just write and check if no threads or if proc == 1) */ |
| 2071 | local int outb(void *desc, unsigned char *buf, unsigned len) |
| 2072 | { |
| 2073 | #ifndef NOTHREAD |
| 2074 | static thread *wr, *ch; |
| 2075 | |
| 2076 | (void)desc; |
| 2077 | |
| 2078 | if (procs > 1) { |
| 2079 | /* if first time, initialize state and launch threads */ |
| 2080 | if (outb_write_more == NULL) { |
| 2081 | outb_write_more = new_lock(0); |
| 2082 | outb_check_more = new_lock(0); |
| 2083 | wr = launch(outb_write, NULL); |
| 2084 | ch = launch(outb_check, NULL); |
| 2085 | } |
| 2086 | |
| 2087 | /* wait for previous write and check threads to complete */ |
| 2088 | possess(outb_check_more); |
| 2089 | wait_for(outb_check_more, TO_BE, 0); |
| 2090 | possess(outb_write_more); |
| 2091 | wait_for(outb_write_more, TO_BE, 0); |
| 2092 | |
| 2093 | /* copy the output and alert the worker bees */ |
| 2094 | out_len = len; |
| 2095 | out_tot += len; |
| 2096 | memcpy(out_copy, buf, len); |
| 2097 | twist(outb_write_more, TO, 1); |
| 2098 | twist(outb_check_more, TO, 1); |
| 2099 | |
| 2100 | /* if requested with len == 0, clean up -- terminate and join write and |
| 2101 | check threads, free lock */ |
| 2102 | if (len == 0) { |
| 2103 | join(ch); |
| 2104 | join(wr); |
| 2105 | free_lock(outb_check_more); |
| 2106 | free_lock(outb_write_more); |
| 2107 | outb_write_more = NULL; |
| 2108 | } |
| 2109 | |
| 2110 | /* return for more decompression while last buffer is being written |
| 2111 | and having its check value calculated -- we wait for those to finish |
| 2112 | the next time this function is called */ |
| 2113 | return 0; |
| 2114 | } |
| 2115 | #endif |
| 2116 | |
| 2117 | /* if just one process or no threads, then do it without threads */ |
| 2118 | if (len) { |
| 2119 | if (decode == 1) |
| 2120 | writen(outd, buf, len); |
| 2121 | out_check = CHECK(out_check, buf, len); |
| 2122 | out_tot += len; |
| 2123 | } |
| 2124 | return 0; |
| 2125 | } |
| 2126 | |
| 2127 | /* inflate for decompression or testing -- decompress from ind to outd unless |
| 2128 | decode != 1, in which case just test ind, and then also list if list != 0; |
| 2129 | look for and decode multiple, concatenated gzip and/or zlib streams; |
| 2130 | read and check the gzip, zlib, or zip trailer */ |
| 2131 | local void infchk(void) |
| 2132 | { |
| 2133 | int ret, cont; |
| 2134 | unsigned long check, len; |
| 2135 | z_stream strm; |
| 2136 | unsigned tmp2; |
| 2137 | unsigned long tmp4; |
| 2138 | off_t clen; |
| 2139 | |
| 2140 | cont = 0; |
| 2141 | do { |
| 2142 | /* header already read -- set up for decompression */ |
| 2143 | in_tot = in_left; /* track compressed data length */ |
| 2144 | out_tot = 0; |
| 2145 | out_check = CHECK(0L, Z_NULL, 0); |
| 2146 | strm.zalloc = Z_NULL; |
| 2147 | strm.zfree = Z_NULL; |
| 2148 | strm.opaque = Z_NULL; |
| 2149 | ret = inflateBackInit(&strm, 15, out_buf); |
| 2150 | if (ret != Z_OK) |
| 2151 | bail("not enough memory", ""); |
| 2152 | |
| 2153 | /* decompress, compute lengths and check value */ |
| 2154 | strm.avail_in = in_left; |
| 2155 | strm.next_in = in_next; |
| 2156 | ret = inflateBack(&strm, inb, NULL, outb, NULL); |
| 2157 | if (ret != Z_STREAM_END) |
| 2158 | bail("corrupted input -- invalid deflate data: ", in); |
| 2159 | in_left = strm.avail_in; |
| 2160 | in_next = strm.next_in; |
| 2161 | inflateBackEnd(&strm); |
| 2162 | outb(NULL, NULL, 0); /* finish off final write and check */ |
| 2163 | |
| 2164 | /* compute compressed data length */ |
| 2165 | clen = in_tot - in_left; |
| 2166 | |
| 2167 | /* read and check trailer */ |
| 2168 | if (form > 1) { /* zip local trailer (if any) */ |
| 2169 | if (form == 3) { /* data descriptor follows */ |
| 2170 | /* read original version of data descriptor*/ |
| 2171 | zip_crc = GET4(); |
| 2172 | zip_clen = GET4(); |
| 2173 | zip_ulen = GET4(); |
| 2174 | if (in_eof) |
| 2175 | bail("corrupted zip entry -- missing trailer: ", in); |
| 2176 | |
| 2177 | /* if crc doesn't match, try info-zip variant with sig */ |
| 2178 | if (zip_crc != out_check) { |
| 2179 | if (zip_crc != 0x08074b50UL || zip_clen != out_check) |
| 2180 | bail("corrupted zip entry -- crc32 mismatch: ", in); |
| 2181 | zip_crc = zip_clen; |
| 2182 | zip_clen = zip_ulen; |
| 2183 | zip_ulen = GET4(); |
| 2184 | } |
| 2185 | |
| 2186 | /* if second length doesn't match, try 64-bit lengths */ |
| 2187 | if (zip_ulen != (out_tot & LOW32)) { |
| 2188 | zip_ulen = GET4(); |
| 2189 | (void)GET4(); |
| 2190 | } |
| 2191 | if (in_eof) |
| 2192 | bail("corrupted zip entry -- missing trailer: ", in); |
| 2193 | } |
| 2194 | if (zip_clen != (clen & LOW32) || zip_ulen != (out_tot & LOW32)) |
| 2195 | bail("corrupted zip entry -- length mismatch: ", in); |
| 2196 | check = zip_crc; |
| 2197 | } |
| 2198 | else if (form == 1) { /* zlib (big-endian) trailer */ |
| 2199 | check = (unsigned long)(GET()) << 24; |
| 2200 | check += (unsigned long)(GET()) << 16; |
| 2201 | check += GET() << 8; |
| 2202 | check += GET(); |
| 2203 | if (in_eof) |
| 2204 | bail("corrupted zlib stream -- missing trailer: ", in); |
| 2205 | if (check != out_check) |
| 2206 | bail("corrupted zlib stream -- adler32 mismatch: ", in); |
| 2207 | } |
| 2208 | else { /* gzip trailer */ |
| 2209 | check = GET4(); |
| 2210 | len = GET4(); |
| 2211 | if (in_eof) |
| 2212 | bail("corrupted gzip stream -- missing trailer: ", in); |
| 2213 | if (check != out_check) |
| 2214 | bail("corrupted gzip stream -- crc32 mismatch: ", in); |
| 2215 | if (len != (out_tot & LOW32)) |
| 2216 | bail("corrupted gzip stream -- length mismatch: ", in); |
| 2217 | } |
| 2218 | |
| 2219 | /* show file information if requested */ |
| 2220 | if (list) { |
| 2221 | in_tot = clen; |
| 2222 | show_info(8, check, out_tot, cont); |
| 2223 | cont = 1; |
| 2224 | } |
| 2225 | |
| 2226 | /* if a gzip or zlib entry follows a gzip or zlib entry, decompress it |
| 2227 | (don't replace saved header information from first entry) */ |
| 2228 | } while (form < 2 && (ret = get_header(0)) == 8 && form < 2); |
| 2229 | if (ret != -1 && form < 2) |
| 2230 | fprintf(stderr, "%s OK, has trailing junk which was ignored\n", in); |
| 2231 | } |
| 2232 | |
| 2233 | /* --- decompress Unix compress (LZW) input --- */ |
| 2234 | |
| 2235 | /* memory for unlzw() -- |
| 2236 | the first 256 entries of prefix[] and suffix[] are never used, could |
| 2237 | have offset the index, but it's faster to waste the memory */ |
| 2238 | unsigned short prefix[65536]; /* index to LZW prefix string */ |
| 2239 | unsigned char suffix[65536]; /* one-character LZW suffix */ |
| 2240 | unsigned char match[65280 + 2]; /* buffer for reversed match */ |
| 2241 | |
| 2242 | /* throw out what's left in the current bits byte buffer (this is a vestigial |
| 2243 | aspect of the compressed data format derived from an implementation that |
| 2244 | made use of a special VAX machine instruction!) */ |
| 2245 | #define FLUSHCODE() \ |
| 2246 | do { \ |
| 2247 | left = 0; \ |
| 2248 | rem = 0; \ |
| 2249 | if (chunk > in_left) { \ |
| 2250 | chunk -= in_left; \ |
| 2251 | if (load() == 0) \ |
| 2252 | break; \ |
| 2253 | if (chunk > in_left) { \ |
| 2254 | chunk = in_left = 0; \ |
| 2255 | break; \ |
| 2256 | } \ |
| 2257 | } \ |
| 2258 | in_left -= chunk; \ |
| 2259 | in_next += chunk; \ |
| 2260 | chunk = 0; \ |
| 2261 | } while (0) |
| 2262 | |
| 2263 | /* Decompress a compress (LZW) file from ind to outd. The compress magic |
| 2264 | header (two bytes) has already been read and verified. */ |
| 2265 | local void unlzw(void) |
| 2266 | { |
| 2267 | int got; /* byte just read by GET() */ |
| 2268 | unsigned chunk; /* bytes left in current chunk */ |
| 2269 | int left; /* bits left in rem */ |
| 2270 | unsigned rem; /* unused bits from input */ |
| 2271 | int bits; /* current bits per code */ |
| 2272 | unsigned code; /* code, table traversal index */ |
| 2273 | unsigned mask; /* mask for current bits codes */ |
| 2274 | int max; /* maximum bits per code for this stream */ |
| 2275 | int flags; /* compress flags, then block compress flag */ |
| 2276 | unsigned end; /* last valid entry in prefix/suffix tables */ |
| 2277 | unsigned temp; /* current code */ |
| 2278 | unsigned prev; /* previous code */ |
| 2279 | unsigned final; /* last character written for previous code */ |
| 2280 | unsigned stack; /* next position for reversed string */ |
| 2281 | unsigned outcnt; /* bytes in output buffer */ |
| 2282 | unsigned char *p; |
| 2283 | |
| 2284 | /* process remainder of compress header -- a flags byte */ |
| 2285 | out_tot = 0; |
| 2286 | flags = GET(); |
| 2287 | if (in_eof) |
| 2288 | bail("missing lzw data: ", in); |
| 2289 | if (flags & 0x60) |
| 2290 | bail("unknown lzw flags set: ", in); |
| 2291 | max = flags & 0x1f; |
| 2292 | if (max < 9 || max > 16) |
| 2293 | bail("lzw bits out of range: ", in); |
| 2294 | if (max == 9) /* 9 doesn't really mean 9 */ |
| 2295 | max = 10; |
| 2296 | flags &= 0x80; /* true if block compress */ |
| 2297 | |
| 2298 | /* clear table */ |
| 2299 | bits = 9; |
| 2300 | mask = 0x1ff; |
| 2301 | end = flags ? 256 : 255; |
| 2302 | |
| 2303 | /* set up: get first 9-bit code, which is the first decompressed byte, but |
| 2304 | don't create a table entry until the next code */ |
| 2305 | got = GET(); |
| 2306 | if (in_eof) /* no compressed data is ok */ |
| 2307 | return; |
| 2308 | final = prev = (unsigned)got; /* low 8 bits of code */ |
| 2309 | got = GET(); |
| 2310 | if (in_eof || (got & 1) != 0) /* missing a bit or code >= 256 */ |
| 2311 | bail("invalid lzw code: ", in); |
| 2312 | rem = (unsigned)got >> 1; /* remaining 7 bits */ |
| 2313 | left = 7; |
| 2314 | chunk = bits - 2; /* 7 bytes left in this chunk */ |
| 2315 | out_buf[0] = (unsigned char)final; /* write first decompressed byte */ |
| 2316 | outcnt = 1; |
| 2317 | |
| 2318 | /* decode codes */ |
| 2319 | stack = 0; |
| 2320 | for (;;) { |
| 2321 | /* if the table will be full after this, increment the code size */ |
| 2322 | if (end >= mask && bits < max) { |
| 2323 | FLUSHCODE(); |
| 2324 | bits++; |
| 2325 | mask <<= 1; |
| 2326 | mask++; |
| 2327 | } |
| 2328 | |
| 2329 | /* get a code of length bits */ |
| 2330 | if (chunk == 0) /* decrement chunk modulo bits */ |
| 2331 | chunk = bits; |
| 2332 | code = rem; /* low bits of code */ |
| 2333 | got = GET(); |
| 2334 | if (in_eof) { /* EOF is end of compressed data */ |
| 2335 | /* write remaining buffered output */ |
| 2336 | out_tot += outcnt; |
| 2337 | if (outcnt && decode == 1) |
| 2338 | writen(outd, out_buf, outcnt); |
| 2339 | return; |
| 2340 | } |
| 2341 | code += (unsigned)got << left; /* middle (or high) bits of code */ |
| 2342 | left += 8; |
| 2343 | chunk--; |
| 2344 | if (bits > left) { /* need more bits */ |
| 2345 | got = GET(); |
| 2346 | if (in_eof) /* can't end in middle of code */ |
| 2347 | bail("invalid lzw code: ", in); |
| 2348 | code += (unsigned)got << left; /* high bits of code */ |
| 2349 | left += 8; |
| 2350 | chunk--; |
| 2351 | } |
| 2352 | code &= mask; /* mask to current code length */ |
| 2353 | left -= bits; /* number of unused bits */ |
| 2354 | rem = (unsigned)got >> (8 - left); /* unused bits from last byte */ |
| 2355 | |
| 2356 | /* process clear code (256) */ |
| 2357 | if (code == 256 && flags) { |
| 2358 | FLUSHCODE(); |
| 2359 | bits = 9; /* initialize bits and mask */ |
| 2360 | mask = 0x1ff; |
| 2361 | end = 255; /* empty table */ |
| 2362 | continue; /* get next code */ |
| 2363 | } |
| 2364 | |
| 2365 | /* special code to reuse last match */ |
| 2366 | temp = code; /* save the current code */ |
| 2367 | if (code > end) { |
| 2368 | /* Be picky on the allowed code here, and make sure that the code |
| 2369 | we drop through (prev) will be a valid index so that random |
| 2370 | input does not cause an exception. The code != end + 1 check is |
| 2371 | empirically derived, and not checked in the original uncompress |
| 2372 | code. If this ever causes a problem, that check could be safely |
| 2373 | removed. Leaving this check in greatly improves pigz's ability |
| 2374 | to detect random or corrupted input after a compress header. |
| 2375 | In any case, the prev > end check must be retained. */ |
| 2376 | if (code != end + 1 || prev > end) |
| 2377 | bail("invalid lzw code: ", in); |
| 2378 | match[stack++] = (unsigned char)final; |
| 2379 | code = prev; |
| 2380 | } |
| 2381 | |
| 2382 | /* walk through linked list to generate output in reverse order */ |
| 2383 | p = match + stack; |
| 2384 | while (code >= 256) { |
| 2385 | *p++ = suffix[code]; |
| 2386 | code = prefix[code]; |
| 2387 | } |
| 2388 | stack = p - match; |
| 2389 | match[stack++] = (unsigned char)code; |
| 2390 | final = code; |
| 2391 | |
| 2392 | /* link new table entry */ |
| 2393 | if (end < mask) { |
| 2394 | end++; |
| 2395 | prefix[end] = (unsigned short)prev; |
| 2396 | suffix[end] = (unsigned char)final; |
| 2397 | } |
| 2398 | |
| 2399 | /* set previous code for next iteration */ |
| 2400 | prev = temp; |
| 2401 | |
| 2402 | /* write output in forward order */ |
| 2403 | while (stack > OUTSIZE - outcnt) { |
| 2404 | while (outcnt < OUTSIZE) |
| 2405 | out_buf[outcnt++] = match[--stack]; |
| 2406 | out_tot += outcnt; |
| 2407 | if (decode == 1) |
| 2408 | writen(outd, out_buf, outcnt); |
| 2409 | outcnt = 0; |
| 2410 | } |
| 2411 | p = match + stack; |
| 2412 | do { |
| 2413 | out_buf[outcnt++] = *--p; |
| 2414 | } while (p > match); |
| 2415 | stack = 0; |
| 2416 | |
| 2417 | /* loop for next code with final and prev as the last match, rem and |
| 2418 | left provide the first 0..7 bits of the next code, end is the last |
| 2419 | valid table entry */ |
| 2420 | } |
| 2421 | } |
| 2422 | |
| 2423 | /* --- file processing --- */ |
| 2424 | |
| 2425 | /* extract file name from path */ |
| 2426 | local char *justname(char *path) |
| 2427 | { |
| 2428 | char *p; |
| 2429 | |
| 2430 | p = path + strlen(path); |
| 2431 | while (--p >= path) |
| 2432 | if (*p == '/') |
| 2433 | break; |
| 2434 | return p + 1; |
| 2435 | } |
| 2436 | |
| 2437 | /* Copy file attributes, from -> to, as best we can. This is best effort, so |
| 2438 | no errors are reported. The mode bits, including suid, sgid, and the sticky |
| 2439 | bit are copied (if allowed), the owner's user id and group id are copied |
| 2440 | (again if allowed), and the access and modify times are copied. */ |
| 2441 | local void copymeta(char *from, char *to) |
| 2442 | { |
| 2443 | struct stat st; |
| 2444 | struct timeval times[2]; |
| 2445 | |
| 2446 | /* get all of from's Unix meta data, return if not a regular file */ |
| 2447 | if (stat(from, &st) != 0 || (st.st_mode & S_IFMT) != S_IFREG) |
| 2448 | return; |
| 2449 | |
| 2450 | /* set to's mode bits, ignore errors */ |
| 2451 | chmod(to, st.st_mode & 07777); |
| 2452 | |
| 2453 | /* copy owner's user and group, ignore errors */ |
| 2454 | chown(to, st.st_uid, st.st_gid); |
| 2455 | |
| 2456 | /* copy access and modify times, ignore errors */ |
| 2457 | times[0].tv_sec = st.st_atime; |
| 2458 | times[0].tv_usec = 0; |
| 2459 | times[1].tv_sec = st.st_mtime; |
| 2460 | times[1].tv_usec = 0; |
| 2461 | utimes(to, times); |
| 2462 | } |
| 2463 | |
| 2464 | /* set the access and modify times of fd to t */ |
| 2465 | local void touch(char *path, time_t t) |
| 2466 | { |
| 2467 | struct timeval times[2]; |
| 2468 | |
| 2469 | times[0].tv_sec = t; |
| 2470 | times[0].tv_usec = 0; |
| 2471 | times[1].tv_sec = t; |
| 2472 | times[1].tv_usec = 0; |
| 2473 | utimes(path, times); |
| 2474 | } |
| 2475 | |
| 2476 | /* process provided input file, or stdin if path is NULL -- process() can |
| 2477 | call itself for recursive directory processing */ |
| 2478 | local void process(char *path) |
| 2479 | { |
| 2480 | int method = -1; /* get_header() return value */ |
| 2481 | size_t len; /* length of base name (minus suffix) */ |
| 2482 | struct stat st; /* to get file type and mod time */ |
| 2483 | |
| 2484 | /* open input file with name in, descriptor ind -- set name and mtime */ |
| 2485 | if (path == NULL) { |
| 2486 | strcpy(in, "<stdin>"); |
| 2487 | ind = 0; |
| 2488 | name = NULL; |
| 2489 | mtime = headis & 2 ? |
| 2490 | (fstat(ind, &st) ? time(NULL) : st.st_mtime) : 0; |
| 2491 | len = 0; |
| 2492 | } |
| 2493 | else { |
| 2494 | /* set input file name (already set if recursed here) */ |
| 2495 | if (path != in) { |
| 2496 | strncpy(in, path, sizeof(in)); |
| 2497 | if (in[sizeof(in) - 1]) |
| 2498 | bail("name too long: ", path); |
| 2499 | } |
| 2500 | len = strlen(in); |
| 2501 | |
| 2502 | /* only process regular files, but allow symbolic links if -f, |
| 2503 | recurse into directory if -r */ |
| 2504 | if (lstat(in, &st)) { |
| 2505 | #ifdef EOVERFLOW |
| 2506 | if (errno == EOVERFLOW || errno == EFBIG) |
| 2507 | bail(in, |
| 2508 | " too large -- pigz not compiled with large file support"); |
| 2509 | #endif |
| 2510 | if (verbosity > 0) |
| 2511 | fprintf(stderr, "%s does not exist -- skipping\n", in); |
| 2512 | return; |
| 2513 | } |
| 2514 | if ((st.st_mode & S_IFMT) != S_IFREG && |
| 2515 | (st.st_mode & S_IFMT) != S_IFLNK && |
| 2516 | (st.st_mode & S_IFMT) != S_IFDIR) { |
| 2517 | if (verbosity > 0) |
| 2518 | fprintf(stderr, "%s is a special file or device -- skipping\n", |
| 2519 | in); |
| 2520 | return; |
| 2521 | } |
| 2522 | if ((st.st_mode & S_IFMT) == S_IFLNK && !force && !pipeout) { |
| 2523 | if (verbosity > 0) |
| 2524 | fprintf(stderr, "%s is a symbolic link -- skipping\n", in); |
| 2525 | return; |
| 2526 | } |
| 2527 | if ((st.st_mode & S_IFMT) == S_IFDIR && !recurse) { |
| 2528 | if (verbosity > 0) |
| 2529 | fprintf(stderr, "%s is a directory -- skipping\n", in); |
| 2530 | return; |
| 2531 | } |
| 2532 | |
| 2533 | /* recurse into directory (assumes Unix) */ |
| 2534 | if ((st.st_mode & S_IFMT) == S_IFDIR) { |
| 2535 | char *roll, *item, *cut, *base, *bigger; |
| 2536 | size_t len, hold; |
| 2537 | DIR *here; |
| 2538 | struct dirent *next; |
| 2539 | |
| 2540 | /* accumulate list of entries (need to do this, since readdir() |
| 2541 | behavior not defined if directory modified between calls) */ |
| 2542 | here = opendir(in); |
| 2543 | if (here == NULL) |
| 2544 | return; |
| 2545 | hold = 512; |
| 2546 | roll = malloc(hold); |
| 2547 | if (roll == NULL) |
| 2548 | bail("not enough memory", ""); |
| 2549 | *roll = 0; |
| 2550 | item = roll; |
| 2551 | while ((next = readdir(here)) != NULL) { |
| 2552 | if (next->d_name[0] == 0 || |
| 2553 | (next->d_name[0] == '.' && (next->d_name[1] == 0 || |
| 2554 | (next->d_name[1] == '.' && next->d_name[2] == 0)))) |
| 2555 | continue; |
| 2556 | len = strlen(next->d_name) + 1; |
| 2557 | if (item + len + 1 > roll + hold) { |
| 2558 | do { /* make roll bigger */ |
| 2559 | hold <<= 1; |
| 2560 | } while (item + len + 1 > roll + hold); |
| 2561 | bigger = realloc(roll, hold); |
| 2562 | if (bigger == NULL) { |
| 2563 | free(roll); |
| 2564 | bail("not enough memory", ""); |
| 2565 | } |
| 2566 | item = bigger + (item - roll); |
| 2567 | roll = bigger; |
| 2568 | } |
| 2569 | strcpy(item, next->d_name); |
| 2570 | item += len; |
| 2571 | *item = 0; |
| 2572 | } |
| 2573 | closedir(here); |
| 2574 | |
| 2575 | /* run process() for each entry in the directory */ |
| 2576 | cut = base = in + strlen(in); |
| 2577 | if (base > in && base[-1] != (unsigned char)'/') { |
| 2578 | if ((size_t)(base - in) >= sizeof(in)) |
| 2579 | bail("path too long", in); |
| 2580 | *base++ = '/'; |
| 2581 | } |
| 2582 | item = roll; |
| 2583 | while (*item) { |
| 2584 | strncpy(base, item, sizeof(in) - (base - in)); |
| 2585 | if (in[sizeof(in) - 1]) { |
| 2586 | strcpy(in + (sizeof(in) - 4), "..."); |
| 2587 | bail("path too long: ", in); |
| 2588 | } |
| 2589 | process(in); |
| 2590 | item += strlen(item) + 1; |
| 2591 | } |
| 2592 | *cut = 0; |
| 2593 | |
| 2594 | /* release list of entries */ |
| 2595 | free(roll); |
| 2596 | return; |
| 2597 | } |
| 2598 | |
| 2599 | /* don't compress .gz (or provided suffix) files, unless -f */ |
| 2600 | if (!(force || list || decode) && len >= strlen(sufx) && |
| 2601 | strcmp(in + len - strlen(sufx), sufx) == 0) { |
| 2602 | if (verbosity > 0) |
| 2603 | fprintf(stderr, "%s ends with %s -- skipping\n", in, sufx); |
| 2604 | return; |
| 2605 | } |
| 2606 | |
| 2607 | /* only decompress or list files with compressed suffix */ |
| 2608 | if (list || decode) { |
| 2609 | int suf = compressed_suffix(in); |
| 2610 | if (suf == 0) { |
| 2611 | if (verbosity > 0) |
| 2612 | fprintf(stderr, |
| 2613 | "%s does not have compressed suffix -- skipping\n", |
| 2614 | in); |
| 2615 | return; |
| 2616 | } |
| 2617 | len -= suf; |
| 2618 | } |
| 2619 | |
| 2620 | /* open input file */ |
| 2621 | ind = open(in, O_RDONLY, 0); |
| 2622 | if (ind < 0) |
| 2623 | bail("read error on ", in); |
| 2624 | |
| 2625 | /* prepare gzip header information for compression */ |
| 2626 | name = headis & 1 ? justname(in) : NULL; |
| 2627 | mtime = headis & 2 ? st.st_mtime : 0; |
| 2628 | } |
| 2629 | SET_BINARY_MODE(ind); |
| 2630 | |
| 2631 | /* if decoding or testing, try to read gzip header */ |
| 2632 | hname = NULL; |
| 2633 | if (decode) { |
| 2634 | in_init(); |
| 2635 | method = get_header(1); |
| 2636 | if (method != 8 && method != 256) { |
| 2637 | RELEASE(hname); |
| 2638 | if (ind != 0) |
| 2639 | close(ind); |
| 2640 | if (method != -1 && verbosity > 0) |
| 2641 | fprintf(stderr, |
| 2642 | method < 0 ? "%s is not compressed -- skipping\n" : |
| 2643 | "%s has unknown compression method -- skipping\n", |
| 2644 | in); |
| 2645 | return; |
| 2646 | } |
| 2647 | |
| 2648 | /* if requested, test input file (possibly a special list) */ |
| 2649 | if (decode == 2) { |
| 2650 | if (method == 8) |
| 2651 | infchk(); |
| 2652 | else { |
| 2653 | unlzw(); |
| 2654 | if (list) { |
| 2655 | in_tot -= 3; |
| 2656 | show_info(method, 0, out_tot, 0); |
| 2657 | } |
| 2658 | } |
| 2659 | RELEASE(hname); |
| 2660 | if (ind != 0) |
| 2661 | close(ind); |
| 2662 | return; |
| 2663 | } |
| 2664 | } |
| 2665 | |
| 2666 | /* if requested, just list information about input file */ |
| 2667 | if (list) { |
| 2668 | list_info(); |
| 2669 | RELEASE(hname); |
| 2670 | if (ind != 0) |
| 2671 | close(ind); |
| 2672 | return; |
| 2673 | } |
| 2674 | |
| 2675 | /* create output file out, descriptor outd */ |
| 2676 | if (path == NULL || pipeout) { |
| 2677 | /* write to stdout */ |
| 2678 | out = malloc(strlen("<stdout>") + 1); |
| 2679 | if (out == NULL) |
| 2680 | bail("not enough memory", ""); |
| 2681 | strcpy(out, "<stdout>"); |
| 2682 | outd = 1; |
| 2683 | if (!decode && !force && isatty(outd)) |
| 2684 | bail("trying to write compressed data to a terminal", |
| 2685 | " (use -f to force)"); |
| 2686 | } |
| 2687 | else { |
| 2688 | char *to; |
| 2689 | |
| 2690 | /* use header name for output when decompressing with -N */ |
| 2691 | to = in; |
| 2692 | if (decode && (headis & 1) != 0 && hname != NULL) { |
| 2693 | to = hname; |
| 2694 | len = strlen(hname); |
| 2695 | } |
| 2696 | |
| 2697 | /* create output file and open to write */ |
| 2698 | out = malloc(len + (decode ? 0 : strlen(sufx)) + 1); |
| 2699 | if (out == NULL) |
| 2700 | bail("not enough memory", ""); |
| 2701 | memcpy(out, to, len); |
| 2702 | strcpy(out + len, decode ? "" : sufx); |
| 2703 | outd = open(out, O_CREAT | O_TRUNC | O_WRONLY | |
| 2704 | (force ? 0 : O_EXCL), 0666<
|