zip.h #ifndef _zip_H #define _zip_H // #define ZIP_STD #ifdef ZIP_STD #include <time.h> #define DECLARE_HANDLE(name) struct name##__ { int unused; }; typedef struct name##__ *name #ifndef MAX_PATH #define MAX_PATH 1024 #endif typedef unsigned long LDWORD; typedef char TCHAR; typedef FILE* HANDLE; typedef time_t FILETIME; #endif
// ZIP functions -- for creating zip files // This file is a repackaged form of the Info-Zip source code available // at www.info-zip.org. The original copyright notice may be found in // zip.cpp. The repackaging was done by Lucian Wischik to simplify and // extend its use in Windows/C++. Also to add encryption and unicode. #ifndef _unzip_H DECLARE_HANDLE(HZIP); #endif // An HZIP identifies a zip file that is being created
typedef LDWORD ZRESULT; // return codes from any of the zip functions. Listed later.
HZIP CreateZip(const TCHAR *fn, const char *password); HZIP CreateZip(void *buf,unsigned int len, const char *password); HZIP CreateZipHandle(HANDLE h, const char *password); // CreateZip - call this to start the creation of a zip file. // As the zip is being created, it will be stored somewhere: // to a pipe: CreateZipHandle(hpipe_write); // in a file (by handle): CreateZipHandle(hfile); // in a file (by name): CreateZip("c:\\test.zip"); // in memory: CreateZip(buf, len); // or in pagefile memory: CreateZip(0, len); // The final case stores it in memory backed by the system paging file, // where the zip may not exceed len bytes. This is a bit friendlier than // allocating memory with new[]: it won't lead to fragmentation, and the // memory won't be touched unless needed. That means you can give very // large estimates of the maximum-size without too much worry. // As for the password, it lets you encrypt every file in the archive. // (This api doesn't support per-file encryption.) // Note: because pipes don't allow random access, the structure of a zipfile // created into a pipe is slightly different from that created into a file // or memory. In particular, the compressed-size of the item cannot be // stored in the zipfile until after the item itself. (Also, for an item added // itself via a pipe, the uncompressed-size might not either be known until // after.) This is not normally a problem. But if you try to unzip via a pipe // as well, then the unzipper will not know these things about the item until // after it has been unzipped. Therefore: for unzippers which don't just write // each item to disk or to a pipe, but instead pre-allocate memory space into // which to unzip them, then either you have to create the zip not to a pipe, // or you have to add items not from a pipe, or at least when adding items // from a pipe you have to specify the length. // Note: for windows-ce, you cannot close the handle until after CloseZip. // but for real windows, the zip makes its own copy of your handle, so you // can close yours anytime. ZRESULT ZipAdd(HZIP hz,const TCHAR *dstzn, const TCHAR *fn); ZRESULT ZipAdd(HZIP hz,const TCHAR *dstzn, void *src,unsigned int len); ZRESULT ZipAddHandle(HZIP hz,const TCHAR *dstzn, HANDLE h); ZRESULT ZipAddHandle(HZIP hz,const TCHAR *dstzn, HANDLE h, unsigned int len); ZRESULT ZipAddFolder(HZIP hz,const TCHAR *dstzn); // ZipAdd - call this for each file to be added to the zip. // dstzn is the name that the file will be stored as in the zip file. // The file to be added to the zip can come // from a pipe: ZipAddHandle(hz,"file.dat", hpipe_read); // from a file: ZipAddHandle(hz,"file.dat", hfile); // from a filen: ZipAdd(hz,"file.dat", "c:\\docs\\origfile.dat"); // from memory: ZipAdd(hz,"subdir\\file.dat", buf,len); // (folder): ZipAddFolder(hz,"subdir"); // Note: if adding an item from a pipe, and if also creating the zip file itself // to a pipe, then you might wish to pass a non-zero length to the ZipAddHandle // function. This will let the zipfile store the item's size ahead of the // compressed item itself, which in turn makes it easier when unzipping the // zipfile from a pipe.
ZRESULT ZipGetMemory(HZIP hz, void **buf, unsigned long *len); // ZipGetMemory - If the zip was created in memory, via ZipCreate(0,len), // then this function will return information about that memory block. // buf will receive a pointer to its start, and len its length. // Note: you can't add any more after calling this. ZRESULT CloseZip(HZIP hz); // CloseZip - the zip handle must be closed with this function. unsigned int FormatZipMessage(ZRESULT code, TCHAR *buf,unsigned int len); // FormatZipMessage - given an error code, formats it as a string. // It returns the length of the error message. If buf/len points // to a real buffer, then it also writes as much as possible into there.
// These are the result codes: #define ZR_OK 0x00000000 // nb. the pseudo-code zr-recent is never returned, #define ZR_RECENT 0x00000001 // but can be passed to FormatZipMessage. // The following come from general system stuff (e.g. files not openable) #define ZR_GENMASK 0x0000FF00 #define ZR_NODUPH 0x00000100 // couldn't duplicate the handle #define ZR_NOFILE 0x00000200 // couldn't create/open the file #define ZR_NOALLOC 0x00000300 // failed to allocate some resource #define ZR_WRITE 0x00000400 // a general error writing to the file #define ZR_NOTFOUND 0x00000500 // couldn't find that file in the zip #define ZR_MORE 0x00000600 // there's still more data to be unzipped #define ZR_CORRUPT 0x00000700 // the zipfile is corrupt or not a zipfile #define ZR_READ 0x00000800 // a general error reading the file // The following come from mistakes on the part of the caller #define ZR_CALLERMASK 0x00FF0000 #define ZR_ARGS 0x00010000 // general mistake with the arguments #define ZR_NOTMMAP 0x00020000 // tried to ZipGetMemory, but that only works on mmap zipfiles, which yours wasn't #define ZR_MEMSIZE 0x00030000 // the memory size is too small #define ZR_FAILED 0x00040000 // the thing was already failed when you called this function #define ZR_ENDED 0x00050000 // the zip creation has already been closed #define ZR_MISSIZE 0x00060000 // the indicated input file size turned out mistaken #define ZR_PARTIALUNZ 0x00070000 // the file had already been partially unzipped #define ZR_ZMODE 0x00080000 // tried to mix creating/opening a zip // The following come from bugs within the zip library itself #define ZR_BUGMASK 0xFF000000 #define ZR_NOTINITED 0x01000000 // initialisation didn't work #define ZR_SEEK 0x02000000 // trying to seek in an unseekable file #define ZR_NOCHANGE 0x04000000 // changed its mind on storage, but not allowed #define ZR_FLATE 0x05000000 // an internal error in the de/inflation code
// e.g. // // (1) Traditional use, creating a zipfile from existing files // HZIP hz = CreateZip("c:\\simple1.zip",0); // ZipAdd(hz,"znsimple.bmp", "c:\\simple.bmp"); // ZipAdd(hz,"znsimple.txt", "c:\\simple.txt"); // CloseZip(hz); // // (2) Memory use, creating an auto-allocated mem-based zip file from various sources // HZIP hz = CreateZip(0,100000, 0); // // adding a conventional file... // ZipAdd(hz,"src1.txt", "c:\\src1.txt"); // // adding something from memory... // char buf[1000]; for (int i=0; i<1000; i++) buf[i]=(char)(i&0x7F); // ZipAdd(hz,"file.dat", buf,1000); // // adding something from a pipe... // HANDLE hread,hwrite; CreatePipe(&hread,&hwrite,NULL,0); // HANDLE hthread = CreateThread(0,0,ThreadFunc,(void*)hwrite,0,0); // ZipAdd(hz,"unz3.dat", hread,1000); // the '1000' is optional. // WaitForSingleObject(hthread,INFINITE); // CloseHandle(hthread); CloseHandle(hread); // ... meanwhile LDWORD WINAPI ThreadFunc(void *dat) // { HANDLE hwrite = (HANDLE)dat; // char buf[1000]={17}; // LDWORD writ; WriteFile(hwrite,buf,1000,&writ,NULL); // CloseHandle(hwrite); // return 0; // } // // and now that the zip is created, let's do something with it: // void *zbuf; unsigned long zlen; ZipGetMemory(hz,&zbuf,&zlen); // HANDLE hfz = CreateFile("test2.zip",GENERIC_WRITE,0,0,CREATE_ALWAYS,FILE_ATTRIBUTE_NORMAL,0); // LDWORD writ; WriteFile(hfz,zbuf,zlen,&writ,NULL); // CloseHandle(hfz); // CloseZip(hz); // // (3) Handle use, for file handles and pipes // HANDLE hzread,hzwrite; CreatePipe(&hzread,&hzwrite,0,0); // HANDLE hthread = CreateThread(0,0,ZipReceiverThread,(void*)hzread,0,0); // HZIP hz = CreateZipHandle(hzwrite,0); // // ... add to it // CloseZip(hz); // CloseHandle(hzwrite); // WaitForSingleObject(hthread,INFINITE); // CloseHandle(hthread); // ... meanwhile LDWORD WINAPI ZipReceiverThread(void *dat) // { HANDLE hread = (HANDLE)dat; // char buf[1000]; // while (true) // { LDWORD red; ReadFile(hread,buf,1000,&red,NULL); // // ... and do something with this zip data we're receiving // if (red==0) break; // } // CloseHandle(hread); // return 0; // }
// Now we indulge in a little skullduggery so that the code works whether // the user has included just zip or both zip and unzip. // Idea: if header files for both zip and unzip are present, then presumably // the cpp files for zip and unzip are both present, so we will call // one or the other of them based on a dynamic choice. If the header file // for only one is present, then we will bind to that particular one. ZRESULT CloseZipZ(HZIP hz); unsigned int FormatZipMessageZ(ZRESULT code, char *buf,unsigned int len); bool IsZipHandleZ(HZIP hz); #ifdef _unzip_H #undef CloseZip #define CloseZip(hz) (IsZipHandleZ(hz)?CloseZipZ(hz):CloseZipU(hz)) #else #define CloseZip CloseZipZ #define FormatZipMessage FormatZipMessageZ #endif
#endif 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 zip.cpp #define ZIP_STD #ifdef ZIP_STD #include <stdio.h> #include <stdlib.h> #include <stdarg.h> #include <time.h> #include <sys/types.h> #include <sys/stat.h> #include <memory.h> #include <string.h> #include <ctype.h> #include "zip.h" // typedef unsigned short WORD; #define _tcslen strlen #define _tcsicmp stricmp #define _tcsncpy strncpy #define _tcsstr strstr #define INVALID_HANDLE_VALUE 0 #ifndef _T #define _T(s) s #endif #ifndef S_IWUSR #define S_IWUSR 0000200 #define S_ISDIR(m) (((m) & _S_IFMT) == _S_IFDIR) #define S_ISREG(m) (((m) & _S_IFMT) == _S_IFREG) #endif // #else #include <windows.h> #include <tchar.h> #include <ctype.h> #include <stdio.h> #include "zip.h" #endif // THIS FILE is almost entirely based upon code by info-zip. // It has been modified by Lucian Wischik. The modifications // were a complete rewrite of the bit of code that generates the // layout of the zipfile, and support for zipping to/from memory // or handles or pipes or pagefile or diskfiles, encryption, unicode. // The original code may be found at http://www.info-zip.org // The original copyright text follows. // // // // This is version 1999-Oct-05 of the Info-ZIP copyright and license. // The definitive version of this document should be available at // ftp://ftp.cdrom.com/pub/infozip/license.html indefinitely. // // Copyright (c) 1990-1999 Info-ZIP. All rights reserved. // // For the purposes of this copyright and license, "Info-ZIP" is defined as // the following set of individuals: // // Mark Adler, John Bush, Karl Davis, Harald Denker, Jean-Michel Dubois, // Jean-loup Gailly, Hunter Goatley, Ian Gorman, Chris Herborth, Dirk Haase, // Greg Hartwig, Robert Heath, Jonathan Hudson, Paul Kienitz, David Kirschbaum, // Johnny Lee, Onno van der Linden, Igor Mandrichenko, Steve P. Miller, // Sergio Monesi, Keith Owens, George Petrov, Greg Roelofs, Kai Uwe Rommel, // Steve Salisbury, Dave Smith, Christian Spieler, Antoine Verheijen, // Paul von Behren, Rich Wales, Mike White // // This software is provided "as is," without warranty of any kind, express // or implied. In no event shall Info-ZIP or its contributors be held liable // for any direct, indirect, incidental, special or consequential damages // arising out of the use of or inability to use this software. // // Permission is granted to anyone to use this software for any purpose, // including commercial applications, and to alter it and redistribute it // freely, subject to the following restrictions: // // 1. Redistributions of source code must retain the above copyright notice, // definition, disclaimer, and this list of conditions. // // 2. Redistributions in binary form must reproduce the above copyright // notice, definition, disclaimer, and this list of conditions in // documentation and/or other materials provided with the distribution. // // 3. Altered versions--including, but not limited to, ports to new operating // systems, existing ports with new graphical interfaces, and dynamic, // shared, or static library versions--must be plainly marked as such // and must not be misrepresented as being the original source. Such // altered versions also must not be misrepresented as being Info-ZIP // releases--including, but not limited to, labeling of the altered // versions with the names "Info-ZIP" (or any variation thereof, including, // but not limited to, different capitalizations), "Pocket UnZip," "WiZ" // or "MacZip" without the explicit permission of Info-ZIP. Such altered // versions are further prohibited from misrepresentative use of the // Zip-Bugs or Info-ZIP e-mail addresses or of the Info-ZIP URL(s). // // 4. Info-ZIP retains the right to use the names "Info-ZIP," "Zip," "UnZip," // "WiZ," "Pocket UnZip," "Pocket Zip," and "MacZip" for its own source and // binary releases. //
typedef unsigned char uch; // unsigned 8-bit value typedef unsigned short ush; // unsigned 16-bit value typedef unsigned long ulg; // unsigned 32-bit value typedef size_t extent; // file size typedef unsigned Pos; // must be at least 32 bits typedef unsigned IPos; // A Pos is an index in the character window. Pos is used only for parameter passing
#ifndef EOF #define EOF (-1) #endif
// Error return values. The values 0..4 and 12..18 follow the conventions // of PKZIP. The values 4..10 are all assigned to "insufficient memory" // by PKZIP, so the codes 5..10 are used here for other purposes. #define ZE_MISS -1 // used by procname(), zipbare() #define ZE_OK 0 // success #define ZE_EOF 2 // unexpected end of zip file #define ZE_FORM 3 // zip file structure error #define ZE_MEM 4 // out of memory #define ZE_LOGIC 5 // internal logic error #define ZE_BIG 6 // entry too large to split #define ZE_NOTE 7 // invalid comment format #define ZE_TEST 8 // zip test (-T) failed or out of memory #define ZE_ABORT 9 // user interrupt or termination #define ZE_TEMP 10 // error using a temp file #define ZE_READ 11 // read or seek error #define ZE_NONE 12 // nothing to do #define ZE_NAME 13 // missing or empty zip file #define ZE_WRITE 14 // error writing to a file #define ZE_CREAT 15 // couldn't open to write #define ZE_PARMS 16 // bad command line #define ZE_OPEN 18 // could not open a specified file to read #define ZE_MAXERR 18 // the highest error number
// internal file attribute #define UNKNOWN (-1) #define BINARY 0 #define ASCII 1
#define BEST -1 // Use best method (deflation or store) #define STORE 0 // Store method #define DEFLATE 8 // Deflation method #define CRCVAL_INITIAL 0L // MSDOS file or directory attributes #define MSDOS_HIDDEN_ATTR 0x02 #define MSDOS_DIR_ATTR 0x10 // Lengths of headers after signatures in bytes #define LOCHEAD 26 #define CENHEAD 42 #define ENDHEAD 18 // Definitions for extra field handling: #define EB_HEADSIZE 4 /* length of a extra field block header */ #define EB_LEN 2 /* offset of data length field in header */ #define EB_UT_MINLEN 1 /* minimal UT field contains Flags byte */ #define EB_UT_FLAGS 0 /* byte offset of Flags field */ #define EB_UT_TIME1 1 /* byte offset of 1st time value */ #define EB_UT_FL_MTIME (1 << 0) /* mtime present */ #define EB_UT_FL_ATIME (1 << 1) /* atime present */ #define EB_UT_FL_CTIME (1 << 2) /* ctime present */ #define EB_UT_LEN(n) (EB_UT_MINLEN + 4 * (n)) #define EB_L_UT_SIZE (EB_HEADSIZE + EB_UT_LEN(3)) #define EB_C_UT_SIZE (EB_HEADSIZE + EB_UT_LEN(1)) // Macros for writing machine integers to little-endian format #define PUTSH(a,f) {char _putsh_c=(char)((a)&0xff); wfunc(param,&_putsh_c,1); _putsh_c=(char)((a)>>8); wfunc(param,&_putsh_c,1);} #define PUTLG(a,f) {PUTSH((a) & 0xffff,(f)) PUTSH((a) >> 16,(f))}
// -- Structure of a ZIP file -- // Signatures for zip file information headers #define LOCSIG 0x04034b50L #define CENSIG 0x02014b50L #define ENDSIG 0x06054b50L #define EXTLOCSIG 0x08074b50L
#define MIN_MATCH 3 #define MAX_MATCH 258 // The minimum and maximum match lengths
#define WSIZE (0x8000) // Maximum window size = 32K. If you are really short of memory, compile // with a smaller WSIZE but this reduces the compression ratio for files // of size > WSIZE. WSIZE must be a power of two in the current implementation. //
#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) // Minimum amount of lookahead, except at the end of the input file. // See deflate.c for comments about the MIN_MATCH+1. // #define MAX_DIST (WSIZE-MIN_LOOKAHEAD) // In order to simplify the code, particularly on 16 bit machines, match // distances are limited to MAX_DIST instead of WSIZE. // #define ZIP_HANDLE 1 #define ZIP_FILENAME 2 #define ZIP_MEMORY 3 #define ZIP_FOLDER 4
// =========================================================================== // Constants // #define MAX_BITS 15 // All codes must not exceed MAX_BITS bits #define MAX_BL_BITS 7 // Bit length codes must not exceed MAX_BL_BITS bits #define LENGTH_CODES 29 // number of length codes, not counting the special END_BLOCK code #define LITERALS 256 // number of literal bytes 0..255 #define END_BLOCK 256 // end of block literal code #define L_CODES (LITERALS+1+LENGTH_CODES) // number of Literal or Length codes, including the END_BLOCK code #define D_CODES 30 // number of distance codes #define BL_CODES 19 // number of codes used to transfer the bit lengths #define STORED_BLOCK 0 #define STATIC_TREES 1 #define DYN_TREES 2 // The three kinds of block type
#define LIT_BUFSIZE 0x8000 #define DIST_BUFSIZE LIT_BUFSIZE // Sizes of match buffers for literals/lengths and distances. There are // 4 reasons for limiting LIT_BUFSIZE to 64K: // - frequencies can be kept in 16 bit counters // - if compression is not successful for the first block, all input data is // still in the window so we can still emit a stored block even when input // comes from standard input. (This can also be done for all blocks if // LIT_BUFSIZE is not greater than 32K.) // - if compression is not successful for a file smaller than 64K, we can // even emit a stored file instead of a stored block (saving 5 bytes). // - creating new Huffman trees less frequently may not provide fast // adaptation to changes in the input data statistics. (Take for // example a binary file with poorly compressible code followed by // a highly compressible string table.) Smaller buffer sizes give // fast adaptation but have of course the overhead of transmitting trees // more frequently. // - I can't count above 4 // The current code is general and allows DIST_BUFSIZE < LIT_BUFSIZE (to save // memory at the expense of compression). Some optimizations would be possible // if we rely on DIST_BUFSIZE == LIT_BUFSIZE. // #define REP_3_6 16 // repeat previous bit length 3-6 times (2 bits of repeat count) #define REPZ_3_10 17 // repeat a zero length 3-10 times (3 bits of repeat count) #define REPZ_11_138 18 // repeat a zero length 11-138 times (7 bits of repeat count) #define HEAP_SIZE (2*L_CODES+1) // maximum heap size // =========================================================================== // Local data used by the "bit string" routines. //
#define Buf_size (8 * 2*sizeof(char)) // Number of bits used within bi_buf. (bi_buf may be implemented on // more than 16 bits on some systems.) // Output a 16 bit value to the bit stream, lower (oldest) byte first #define PUTSHORT(state,w) \ { if (state.bs.out_offset >= state.bs.out_size-1) \ state.flush_outbuf(state.param,state.bs.out_buf, &state.bs.out_offset); \ state.bs.out_buf[state.bs.out_offset++] = (char) ((w) & 0xff); \ state.bs.out_buf[state.bs.out_offset++] = (char) ((ush)(w) >> 8); \ } #define PUTBYTE(state,b) \ { if (state.bs.out_offset >= state.bs.out_size) \ state.flush_outbuf(state.param,state.bs.out_buf, &state.bs.out_offset); \ state.bs.out_buf[state.bs.out_offset++] = (char) (b); \ } // DEFLATE.CPP HEADER #define HASH_BITS 15 // For portability to 16 bit machines, do not use values above 15. #define HASH_SIZE (unsigned)(1<<HASH_BITS) #define HASH_MASK (HASH_SIZE-1) #define WMASK (WSIZE-1) // HASH_SIZE and WSIZE must be powers of two #define NIL 0 // Tail of hash chains #define FAST 4 #define SLOW 2 // speed options for the general purpose bit flag #define TOO_FAR 4096 // Matches of length 3 are discarded if their distance exceeds TOO_FAR
#define EQUAL 0 // result of memcmp for equal strings // =========================================================================== // Local data used by the "longest match" routines.
#define H_SHIFT ((HASH_BITS+MIN_MATCH-1)/MIN_MATCH) // Number of bits by which ins_h and del_h must be shifted at each // input step. It must be such that after MIN_MATCH steps, the oldest // byte no longer takes part in the hash key, that is: // H_SHIFT * MIN_MATCH >= HASH_BITS #define max_insert_length max_lazy_match // Insert new strings in the hash table only if the match length // is not greater than this length. This saves time but degrades compression. // max_insert_length is used only for compression levels <= 3.
const int extra_lbits[LENGTH_CODES] // extra bits for each length code = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0}; const int extra_dbits[D_CODES] // extra bits for each distance code = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; const int extra_blbits[BL_CODES]// extra bits for each bit length code = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7}; const uch bl_order[BL_CODES] = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15}; // The lengths of the bit length codes are sent in order of decreasing // probability, to avoid transmitting the lengths for unused bit length codes. typedef struct config { ush good_length; // reduce lazy search above this match length ush max_lazy; // do not perform lazy search above this match length ush nice_length; // quit search above this match length ush max_chain; } config;
// Values for max_lazy_match, good_match, nice_match and max_chain_length, // depending on the desired pack level (0..9). The values given below have // been tuned to exclude worst case performance for pathological files. // Better values may be found for specific files. // const config configuration_table[10] = { // good lazy nice chain {0, 0, 0, 0}, // 0 store only {4, 4, 8, 4}, // 1 maximum speed, no lazy matches {4, 5, 16, 8}, // 2 {4, 6, 32, 32}, // 3 {4, 4, 16, 16}, // 4 lazy matches */ {8, 16, 32, 32}, // 5 {8, 16, 128, 128}, // 6 {8, 32, 128, 256}, // 7 {32, 128, 258, 1024}, // 8 {32, 258, 258, 4096}};// 9 maximum compression */ // Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 // For deflate_fast() (levels <= 3) good is ignored and lazy has a different meaning.
// Data structure describing a single value and its code string. typedef struct ct_data { union { ush freq; // frequency count ush code; // bit string } fc; union { ush dad; // father node in Huffman tree ush len; // length of bit string } dl; } ct_data; typedef struct tree_desc { ct_data *dyn_tree; // the dynamic tree ct_data *static_tree; // corresponding static tree or NULL const int *extra_bits; // extra bits for each code or NULL int extra_base; // base index for extra_bits int elems; // max number of elements in the tree int max_length; // max bit length for the codes int max_code; // largest code with non zero frequency } tree_desc;
class TTreeState { public: TTreeState();
ct_data dyn_ltree[HEAP_SIZE]; // literal and length tree ct_data dyn_dtree[2*D_CODES+1]; // distance tree ct_data static_ltree[L_CODES+2]; // the static literal tree... // ... Since the bit lengths are imposed, there is no need for the L_CODES // extra codes used during heap construction. However the codes 286 and 287 // are needed to build a canonical tree (see ct_init below). ct_data static_dtree[D_CODES]; // the static distance tree... // ... (Actually a trivial tree since all codes use 5 bits.) ct_data bl_tree[2*BL_CODES+1]; // Huffman tree for the bit lengths tree_desc l_desc; tree_desc d_desc; tree_desc bl_desc; ush bl_count[MAX_BITS+1]; // number of codes at each bit length for an optimal tree int heap[2*L_CODES+1]; // heap used to build the Huffman trees int heap_len; // number of elements in the heap int heap_max; // element of largest frequency // The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. // The same heap array is used to build all trees. uch depth[2*L_CODES+1]; // Depth of each subtree used as tie breaker for trees of equal frequency uch length_code[MAX_MATCH-MIN_MATCH+1]; // length code for each normalized match length (0 == MIN_MATCH) uch dist_code[512]; // distance codes. The first 256 values correspond to the distances // 3 .. 258, the last 256 values correspond to the top 8 bits of // the 15 bit distances. int base_length[LENGTH_CODES]; // First normalized length for each code (0 = MIN_MATCH) int base_dist[D_CODES]; // First normalized distance for each code (0 = distance of 1) uch l_buf[LIT_BUFSIZE]; // buffer for literals/lengths ush d_buf[DIST_BUFSIZE]; // buffer for distances uch flag_buf[(LIT_BUFSIZE/8)]; // flag_buf is a bit array distinguishing literals from lengths in // l_buf, and thus indicating the presence or absence of a distance. unsigned last_lit; // running index in l_buf unsigned last_dist; // running index in d_buf unsigned last_flags; // running index in flag_buf uch flags; // current flags not yet saved in flag_buf uch flag_bit; // current bit used in flags // bits are filled in flags starting at bit 0 (least significant). // Note: these flags are overkill in the current code since we don't // take advantage of DIST_BUFSIZE == LIT_BUFSIZE. ulg opt_len; // bit length of current block with optimal trees ulg static_len; // bit length of current block with static trees ulg cmpr_bytelen; // total byte length of compressed file ulg cmpr_len_bits; // number of bits past 'cmpr_bytelen' ulg input_len; // total byte length of input file // input_len is for debugging only since we can get it by other means. ush *file_type; // pointer to UNKNOWN, BINARY or ASCII // int *file_method; // pointer to DEFLATE or STORE }; TTreeState::TTreeState() { tree_desc a = {dyn_ltree, static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS, 0}; l_desc = a; tree_desc b = {dyn_dtree, static_dtree, extra_dbits, 0, D_CODES, MAX_BITS, 0}; d_desc = b; tree_desc c = {bl_tree, NULL, extra_blbits, 0, BL_CODES, MAX_BL_BITS, 0}; bl_desc = c; last_lit=0; last_dist=0; last_flags=0; }
class TBitState { public: int flush_flg; // unsigned bi_buf; // Output buffer. bits are inserted starting at the bottom (least significant // bits). The width of bi_buf must be at least 16 bits. int bi_valid; // Number of valid bits in bi_buf. All bits above the last valid bit // are always zero. char *out_buf; // Current output buffer. unsigned out_offset; // Current offset in output buffer. // On 16 bit machines, the buffer is limited to 64K. unsigned out_size; // Size of current output buffer ulg bits_sent; // bit length of the compressed data only needed for debugging??? };
class TDeflateState { public: TDeflateState() {window_size=0;} uch window[2L*WSIZE]; // Sliding window. Input bytes are read into the second half of the window, // and move to the first half later to keep a dictionary of at least WSIZE // bytes. With this organization, matches are limited to a distance of // WSIZE-MAX_MATCH bytes, but this ensures that IO is always // performed with a length multiple of the block size. Also, it limits // the window size to 64K, which is quite useful on MSDOS. // To do: limit the window size to WSIZE+CBSZ if SMALL_MEM (the code would // be less efficient since the data would have to be copied WSIZE/CBSZ times) Pos prev[WSIZE]; // Link to older string with same hash index. To limit the size of this // array to 64K, this link is maintained only for the last 32K strings. // An index in this array is thus a window index modulo 32K. Pos head[HASH_SIZE]; // Heads of the hash chains or NIL. If your compiler thinks that // HASH_SIZE is a dynamic value, recompile with -DDYN_ALLOC. ulg window_size; // window size, 2*WSIZE except for MMAP or BIG_MEM, where it is the // input file length plus MIN_LOOKAHEAD. long block_start; // window position at the beginning of the current output block. Gets // negative when the window is moved backwards. int sliding; // Set to false when the input file is already in memory unsigned ins_h; // hash index of string to be inserted unsigned int prev_length; // Length of the best match at previous step. Matches not greater than this // are discarded. This is used in the lazy match evaluation. unsigned strstart; // start of string to insert unsigned match_start; // start of matching string int eofile; // flag set at end of input file unsigned lookahead; // number of valid bytes ahead in window unsigned max_chain_length; // To speed up deflation, hash chains are never searched beyond this length. // A higher limit improves compression ratio but degrades the speed. unsigned int max_lazy_match; // Attempt to find a better match only when the current match is strictly // smaller than this value. This mechanism is used only for compression // levels >= 4. unsigned good_match; // Use a faster search when the previous match is longer than this int nice_match; // Stop searching when current match exceeds this }; typedef long lutime_t; // define it ourselves since we don't include time.h typedef struct iztimes { lutime_t atime,mtime,ctime; } iztimes; // access, modify, create times typedef struct zlist { ush vem, ver, flg, how; // See central header in zipfile.c for what vem..off are ulg tim, crc, siz, len; extent nam, ext, cext, com; // offset of ext must be >= LOCHEAD ush dsk, att, lflg; // offset of lflg must be >= LOCHEAD ulg atx, off; char name[MAX_PATH]; // File name in zip file char *extra; // Extra field (set only if ext != 0) char *cextra; // Extra in central (set only if cext != 0) char *comment; // Comment (set only if com != 0) char iname[MAX_PATH]; // Internal file name after cleanup char zname[MAX_PATH]; // External version of internal name int mark; // Marker for files to operate on int trash; // Marker for files to delete int dosflag; // Set to force MSDOS file attributes struct zlist *nxt; // Pointer to next header in list } TZipFileInfo; struct TState; typedef unsigned (*READFUNC)(TState &state, char *buf,unsigned size); typedef unsigned (*FLUSHFUNC)(void *param, const char *buf, unsigned *size); typedef unsigned (*WRITEFUNC)(void *param, const char *buf, unsigned size); struct TState { void *param; int level; bool seekable; READFUNC readfunc; FLUSHFUNC flush_outbuf; TTreeState ts; TBitState bs; TDeflateState ds; const char *err; };
// ---------------------------------------------------------------------- // some windows<->linux portability things #ifdef ZIP_STD void filetime2dosdatetime(const FILETIME ft, WORD *dosdate, WORD *dostime) { struct tm *st=gmtime(&ft); *dosdate = (ush)(((st->tm_year+1900 -1980)&0x7f) << 9); *dosdate |= (ush)((st->tm_mon&0xf) << 5); *dosdate |= (ush)((st->tm_mday&0x1f)); *dostime = (ush)((st->tm_hour&0x1f) << 11); *dostime |= (ush)((st->tm_min&0x3f) << 5); *dostime |= (ush)((st->tm_sec*2)&0x1f); }
void GetNow(lutime_t *ft, WORD *dosdate, WORD *dostime) { time_t tm = time(0); filetime2dosdatetime(tm,dosdate,dostime); *ft = (lutime_t)tm; } LDWORD GetFilePosZ(HANDLE hfout) { struct stat st; fstat(fileno(hfout),&st); if ((st.st_mode&S_IFREG)==0) return 0xFFFFFFFF; return ftell(hfout); } ZRESULT GetFileInfo(FILE *hf, ulg *attr, long *size, iztimes *times, ulg *timestamp) { // The handle must be a handle to a file // The date and time is returned in a long with the date most significant to allow // unsigned integer comparison of absolute times. The attributes have two // high bytes unix attr, and two low bytes a mapping of that to DOS attr. struct stat bhi; int res=fstat(fileno(hf),&bhi); if (res==-1) return ZR_NOFILE; ulg fa=bhi.st_mode; ulg a=0; // Zip uses the lower word for its interpretation of windows stuff if ((fa&S_IWUSR)==0) a|=0x01; if (S_ISDIR(fa)) a|=0x10; // It uses the upper word for standard unix attr a |= ((fa&0xFFFF)<<16); // if (attr!=NULL) *attr = a; if (size!=NULL) *size = bhi.st_size; if (times!=NULL) { times->atime = (lutime_t)bhi.st_atime; times->mtime = (lutime_t)bhi.st_mtime; times->ctime = (lutime_t)bhi.st_ctime; } if (timestamp!=NULL) { ush dosdate,dostime; filetime2dosdatetime(bhi.st_mtime,&dosdate,&dostime); *timestamp = (ush)dostime | (((ulg)dosdate)<<16); } return ZR_OK; } // ---------------------------------------------------------------------- #else void filetime2dosdatetime(const FILETIME ft, WORD *dosdate,WORD *dostime) { // date: bits 0-4 are day of month 1-31. Bits 5-8 are month 1..12. Bits 9-15 are year-1980 // time: bits 0-4 are seconds/2, bits 5-10 are minute 0..59. Bits 11-15 are hour 0..23 SYSTEMTIME st; FileTimeToSystemTime(&ft,&st); *dosdate = (WORD)(((st.wYear-1980)&0x7f) << 9); *dosdate |= (WORD)((st.wMonth&0xf) << 5); *dosdate |= (WORD)((st.wDay&0x1f)); *dostime = (WORD)((st.wHour&0x1f) << 11); *dostime |= (WORD)((st.wMinute&0x3f) << 5); *dostime |= (WORD)((st.wSecond*2)&0x1f); }
lutime_t filetime2timet(const FILETIME ft) { LONGLONG i = *(LONGLONG*)&ft; return (lutime_t)((i-116444736000000000LL)/10000000LL); } void GetNow(lutime_t *pft, WORD *dosdate, WORD *dostime) { SYSTEMTIME st; GetLocalTime(&st); FILETIME ft; SystemTimeToFileTime(&st,&ft); filetime2dosdatetime(ft,dosdate,dostime); *pft = filetime2timet(ft); } LDWORD GetFilePosZ(HANDLE hfout) { return SetFilePointer(hfout,0,0,FILE_CURRENT); } ZRESULT GetFileInfo(HANDLE hf, ulg *attr, long *size, iztimes *times, ulg *timestamp) { // The handle must be a handle to a file // The date and time is returned in a long with the date most significant to allow // unsigned integer comparison of absolute times. The attributes have two // high bytes unix attr, and two low bytes a mapping of that to DOS attr. //struct stat s; int res=stat(fn,&s); if (res!=0) return false; // translate windows file attributes into zip ones. BY_HANDLE_FILE_INFORMATION bhi; BOOL res=GetFileInformationByHandle(hf,&bhi); if (!res) return ZR_NOFILE; LDWORD fa=bhi.dwFileAttributes; ulg a=0; // Zip uses the lower word for its interpretation of windows stuff if (fa&FILE_ATTRIBUTE_READONLY) a|=0x01; if (fa&FILE_ATTRIBUTE_HIDDEN) a|=0x02; if (fa&FILE_ATTRIBUTE_SYSTEM) a|=0x04; if (fa&FILE_ATTRIBUTE_DIRECTORY)a|=0x10; if (fa&FILE_ATTRIBUTE_ARCHIVE) a|=0x20; // It uses the upper word for standard unix attr, which we manually construct if (fa&FILE_ATTRIBUTE_DIRECTORY)a|=0x40000000; // directory else a|=0x80000000; // normal file a|=0x01000000; // readable if (fa&FILE_ATTRIBUTE_READONLY) {} else a|=0x00800000; // writeable // now just a small heuristic to check if it's an executable: LDWORD red, hsize=GetFileSize(hf,NULL); if (hsize>40) { SetFilePointer(hf,0,NULL,FILE_BEGIN); unsigned short magic; ReadFile(hf,&magic,sizeof(magic),&red,NULL); SetFilePointer(hf,36,NULL,FILE_BEGIN); unsigned long hpos; ReadFile(hf,&hpos,sizeof(hpos),&red,NULL); if (magic==0x54AD && hsize>hpos+4+20+28) { SetFilePointer(hf,hpos,NULL,FILE_BEGIN); unsigned long signature; ReadFile(hf,&signature,sizeof(signature),&red,NULL); if (signature==IMAGE_DOS_SIGNATURE || signature==IMAGE_OS2_SIGNATURE || signature==IMAGE_OS2_SIGNATURE_LE || signature==IMAGE_NT_SIGNATURE) { a |= 0x00400000; // executable } } } // if (attr!=NULL) *attr = a; if (size!=NULL) *size = hsize; if (times!=NULL) { // lutime_t is 32bit number of seconds elapsed since 0:0:0GMT, Jan1, 1970. // but FILETIME is 64bit number of 100-nanosecs since Jan1, 1601 times->atime = filetime2timet(bhi.ftLastAccessTime); times->mtime = filetime2timet(bhi.ftLastWriteTime); times->ctime = filetime2timet(bhi.ftCreationTime); } if (timestamp!=NULL) { WORD dosdate,dostime; filetime2dosdatetime(bhi.ftLastWriteTime,&dosdate,&dostime); *timestamp = (WORD)dostime | (((LDWORD)dosdate)<<16); } return ZR_OK; } #endif // ----------------------------------------------------------------------
void Assert(TState &state,bool cond, const char *msg) { if (cond) return; state.err=msg; } void Trace(const char *x, ...) {va_list paramList; va_start(paramList, x); paramList; va_end(paramList);} void Tracec(bool ,const char *x, ...) {va_list paramList; va_start(paramList, x); paramList; va_end(paramList);}
// =========================================================================== // Local (static) routines in this file. // void init_block (TState &); void pqdownheap (TState &,ct_data *tree, int k); void gen_bitlen (TState &,tree_desc *desc); void gen_codes (TState &state,ct_data *tree, int max_code); void build_tree (TState &,tree_desc *desc); void scan_tree (TState &,ct_data *tree, int max_code); void send_tree (TState &state,ct_data *tree, int max_code); int build_bl_tree (TState &); void send_all_trees (TState &state,int lcodes, int dcodes, int blcodes); void compress_block (TState &state,ct_data *ltree, ct_data *dtree); void set_file_type (TState &); void send_bits (TState &state, int value, int length); unsigned bi_reverse (unsigned code, int len); void bi_windup (TState &state); void copy_block (TState &state,char *buf, unsigned len, int header); #define send_code(state, c, tree) send_bits(state, tree[c].fc.code, tree[c].dl.len) // Send a code of the given tree. c and tree must not have side effects
// alternatively... //#define send_code(state, c, tree) // { if (state.verbose>1) fprintf(stderr,"\ncd %3d ",(c)); // send_bits(state, tree[c].fc.code, tree[c].dl.len); } #define d_code(dist) ((dist) < 256 ? state.ts.dist_code[dist] : state.ts.dist_code[256+((dist)>>7)]) // Mapping from a distance to a distance code. dist is the distance - 1 and // must not have side effects. dist_code[256] and dist_code[257] are never used. #define Max(a,b) (a >= b ? a : b) /* the arguments must not have side effects */ /* =========================================================================== * Allocate the match buffer, initialize the various tables and save the * location of the internal file attribute (ascii/binary) and method * (DEFLATE/STORE). */ void ct_init(TState &state, ush *attr) { int n; /* iterates over tree elements */ int bits; /* bit counter */ int length; /* length value */ int code; /* code value */ int dist; /* distance index */ state.ts.file_type = attr; //state.ts.file_method = method; state.ts.cmpr_bytelen = state.ts.cmpr_len_bits = 0L; state.ts.input_len = 0L; if (state.ts.static_dtree[0].dl.len != 0) return; /* ct_init already called */ /* Initialize the mapping length (0..255) -> length code (0..28) */ length = 0; for (code = 0; code < LENGTH_CODES-1; code++) { state.ts.base_length[code] = length; for (n = 0; n < (1<<extra_lbits[code]); n++) { state.ts.length_code[length++] = (uch)code; } } Assert(state,length == 256, "ct_init: length != 256"); /* Note that the length 255 (match length 258) can be represented * in two different ways: code 284 + 5 bits or code 285, so we * overwrite length_code[255] to use the best encoding: */ state.ts.length_code[length-1] = (uch)code; /* Initialize the mapping dist (0..32K) -> dist code (0..29) */ dist = 0; for (code = 0 ; code < 16; code++) { state.ts.base_dist[code] = dist; for (n = 0; n < (1<<extra_dbits[code]); n++) { state.ts.dist_code[dist++] = (uch)code; } } Assert(state,dist == 256, "ct_init: dist != 256"); dist >>= 7; /* from now on, all distances are divided by 128 */ for ( ; code < D_CODES; code++) { state.ts.base_dist[code] = dist << 7; for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) { state.ts.dist_code[256 + dist++] = (uch)code; } } Assert(state,dist == 256, "ct_init: 256+dist != 512"); /* Construct the codes of the static literal tree */ for (bits = 0; bits <= MAX_BITS; bits++) state.ts.bl_count[bits] = 0; n = 0; while (n <= 143) state.ts.static_ltree[n++].dl.len = 8, state.ts.bl_count[8]++; while (n <= 255) state.ts.static_ltree[n++].dl.len = 9, state.ts.bl_count[9]++; while (n <= 279) state.ts.static_ltree[n++].dl.len = 7, state.ts.bl_count[7]++; while (n <= 287) state.ts.static_ltree[n++].dl.len = 8, state.ts.bl_count[8]++; /* fc.codes 286 and 287 do not exist, but we must include them in the * tree construction to get a canonical Huffman tree (longest code * all ones) */ gen_codes(state,(ct_data *)state.ts.static_ltree, L_CODES+1); /* The static distance tree is trivial: */ for (n = 0; n < D_CODES; n++) { state.ts.static_dtree[n].dl.len = 5; state.ts.static_dtree[n].fc.code = (ush)bi_reverse(n, 5); } /* Initialize the first block of the first file: */ init_block(state); } /* =========================================================================== * Initialize a new block. */ void init_block(TState &state) { int n; /* iterates over tree elements */ /* Initialize the trees. */ for (n = 0; n < L_CODES; n++) state.ts.dyn_ltree[n].fc.freq = 0; for (n = 0; n < D_CODES; n++) state.ts.dyn_dtree[n].fc.freq = 0; for (n = 0; n < BL_CODES; n++) state.ts.bl_tree[n].fc.freq = 0; state.ts.dyn_ltree[END_BLOCK].fc.freq = 1; state.ts.opt_len = state.ts.static_len = 0L; state.ts.last_lit = state.ts.last_dist = state.ts.last_flags = 0; state.ts.flags = 0; state.ts.flag_bit = 1; } #define SMALLEST 1 /* Index within the heap array of least frequent node in the Huffman tree */ /* =========================================================================== * Remove the smallest element from the heap and recreate the heap with * one less element. Updates heap and heap_len. */ #define pqremove(tree, top) \ {\ top = state.ts.heap[SMALLEST]; \ state.ts.heap[SMALLEST] = state.ts.heap[state.ts.heap_len--]; \ pqdownheap(state,tree, SMALLEST); \ }
/* =========================================================================== * Compares to subtrees, using the tree depth as tie breaker when * the subtrees have equal frequency. This minimizes the worst case length. */ #define smaller(tree, n, m) \ (tree[n].fc.freq < tree[m].fc.freq || \ (tree[n].fc.freq == tree[m].fc.freq && state.ts.depth[n] <= state.ts.depth[m])) /* =========================================================================== * Restore the heap property by moving down the tree starting at node k, * exchanging a node with the smallest of its two sons if necessary, stopping * when the heap property is re-established (each father smaller than its * two sons). */ void pqdownheap(TState &state,ct_data *tree, int k) { int v = state.ts.heap[k]; int j = k << 1; /* left son of k */ int htemp; /* required because of bug in SASC compiler */ while (j <= state.ts.heap_len) { /* Set j to the smallest of the two sons: */ if (j < state.ts.heap_len && smaller(tree, state.ts.heap[j+1], state.ts.heap[j])) j++; /* Exit if v is smaller than both sons */ htemp = state.ts.heap[j]; if (smaller(tree, v, htemp)) break; /* Exchange v with the smallest son */ state.ts.heap[k] = htemp; k = j; /* And continue down the tree, setting j to the left son of k */ j <<= 1; } state.ts.heap[k] = v; }< |