1, LZ77 is based on the dictionary algorithm, and Huffman coding, the processing of the symbol is not necessarily a text character, can be any size symbol.
2. LZ77 uses a forward buffer (a small segment of the area to be encoded) and a sliding window (search area) to implement it. The sliding window is a historical buffer that is used to store information about the first n bytes of the input stream. The forward buffer corresponds to the dynamic window, which is used to hold the first n bytes of the input stream. Common sliding window 4KB, forward buffer 32B
The main idea of the algorithm is to continuously find the longest phrase in the forward buffer that can match the phrase in the dictionary. If the matching data length is greater than the minimum match length, then output a pair of length, the corresponding position in the sliding window array. Length is the matching length of data, and distance (distance) shows how much backwards in the input stream the matching data can be found.
The most expensive of the LZ77 algorithm is to scan the matching phrase in the sliding window. A more efficient approach is to use a data structure that is efficient in searching performance instead of sliding windows.
LZ77 has a better compression ratio than Huffman coding, but LZ77 takes a considerable amount of time during the compression process.
If the forward buffer contains a, B, and D then the buffer contains the phrase {(A), (a,b) (A,B,D)}
If the sliding window contains A, B, and C, the phrase in the window and dictionary is {(A), (a,b) (A,b,c), (B), (B,c), (c)}
3. Basic Flow
(1), starting at the current compression position, investigating the data not encoded, and trying to find the longest matching string in the sliding window, if found, proceed to step 2, or proceed to step 3.
(2), output ternary symbol Group (OFF,LEN,C). Where off is the offset of the matching string relative to the window boundary in the window, Len is a matching length, and C is the next character, the first character that does not match. Then slide the window back len+1 characters and proceed to step 1.
(3), output ternary symbol Group (0,0,C). Where c is the next character. Then slide the window backward one character and proceed to step 1.
3, the longest string matching
//matches the longest string in the buffer from the window; offset returns the first position in the window; next returns the buffer string and does not match the first character position// Returns the length of the longest string that matches the static int compare_win (const unsigned char *window, const unsigned char *buffer, int *offset, unsigned char
*next) {int match, longest, I, J, K;
*offset = 0;
Longest = 0;
*next = buffer[0];
The outermost loop is the 1th character in WINDOW-nth character, 2nd-nth One ..., n-1 to nth for (k = 0; k < lz77_window_size; k++) {i = k;
j = 0;
Match = 0; In one of the outermost loops find the longest string that the buffer can match (starting with the first symbol in buffer) while (I < lz77_window_size && J < lz77_buffer_size-
1) {if (Window[i]!= buffer[j]) break;
Match statistics at present matched the length of match++;
i++;
j + +;
//Save return information if (Match > Longest) {*offset = k;
Longest = match;
*next = Buffer[j]} return longest; }
4, LZ77 compression
int lz77_compress (const unsigned char *original, unsigned char **compressed, int size) {unsigned char Window[lz77_win
Dow_size], buffer[lz77_buffer_size], *comp, *temp, next;
int offset, length, remaining, Hsize, IPOs, OPOS, TPOs, I;
int token, tbits;
Initialize *compressed = NULL;
memset (window, 0, lz77_window_size);
memset (buffer, 0, lz77_buffer_size);
Writes the source data byte number to the header information hsize = sizeof (int);
comp = (unsigned char *) malloc (hsize);
memcpy (comp, &size, sizeof (int));
IPOs = 0;//ipos to the bytes being processed in the source data from the source data to the buffer for (i = 0; i < lz77_buffer_size && IPOs < SIZE; i++)
{Buffer[i] = Original[ipos];
ipos++;
} opos = Hsize * 8;//opos is the location of the compressed data bit remaining = size; while (Remaining > 0) {//Tag = type + offset (in window) + length + next//next is a mismatched character//t
bit indicates generation tag length if ((Length = compare_win (window, buffer, &offset, &next))!= 0) { can find type 1 token = 0x0000_0001 << (lz77_phrase_bits-1); token = Token |
(offset << lz77_phrase_bits-lz77_type_bits-lz77_winoff_bits); token = Token |
(length << lz77_phrase_bits-lz77_type_bits-lz77_winoff_bits-lz77_buflen_bits); token = Token |
Next
Tbits = lz77_phrase_bits;
else {//not found, the mark is the original symbol token = 0x0000_0000; token = Token |
Next
Tbits = lz77_symbol_bits;
//S data processing is a big-endian mode token = htonl (token); Fill in the data for the compression area for (i = 0; i < tbits i++) {if (opos% 8 = 0) {Temp
= (unsigned char *) realloc (comp, (OPOS/8) + 1);
comp = temp;
Tbits a compressed TPOs = (sizeof (unsigned long) * 8)-tbits + i by length Bit_set (comp, opos, bit_get (unsigned char *) &token,TPOs)); Length++;//length is the matching data byte length//left-shift Update window moves the encoded characters in buffer to window memmove (&window[0), &w
Indow[length], lz77_window_size-length);
Memmove (&window[lz77_window_size-length], &buffer[0], length);
Update the contents of the buffer, do the removal of the encoded characters, and transfer the new characters from the source data memmove (&buffer[0], &buffer[length], lz77_buffer_size-length); for (i = lz77_buffer_size-length; (I < lz77_buffer_size) && (IPOs < SIZE);
i++) {Buffer[i] = Original[ipos];
ipos++;
} remaining = Remaining-length;
} *compressed = comp; Return ((OPOS-1)/8) + 1;}
5, LZ77 decompression
int lz77_uncompress (const unsigned char *compressed, unsigned char **original) {unsigned char window[lz77_window_size
], buffer[lz77_buffer_size];
unsigned char *orig, *temp, next;
int offset, length, remaining, hsize, size, IPOs, OPOS, TPOs, state, I;
*original = orig = NULL;
Reads the source data byte number hize = sizeof (int) from the compressed data header information;
memcpy (&size, compressed, sizeof (int));
memset (window, 0, lz77_window_size);
memset (buffer, 0, lz77_buffer_size);
IPOs = hsize * 8;
opos = 0;
remaining = size;
while (Remaining > 0) {//Read the type first, see if the character state = Bit_get (compressed, IPOs) in window;
ipos++;
if (state = = 1) {//Read offset to offset in memset (&offset, 0, sizeof (int));
for (i = 0; i < lz77_winoff_bits i++) {TPOs = (sizeof (int) * 8)-lz77_winoff_bits + i; Bit_set ((unsigned char *) &offset, TPOs, Bit_get (compressed, IPOs));
ipos++;
//Read length to Length memset (&length, 0, sizeof (int));
for (i = 0; i < lz77_buflen_bits i++) {TPOs = (sizeof (int) * 8)-lz77_buflen_bits + i;
Bit_set ((unsigned char *) &offset, TPOs, Bit_get (compressed, IPOs));
ipos++;
//Read Next next = 0x00;
for (i = 0; i < lz77_next_bits i++) {TPOs = (sizeof (int) * 8)-lz77_next_bits + i;
Bit_set ((unsigned char *) &next, TPOs, Bit_get (compressed, IPOs));
ipos++;
offset = Ntohl (offset);
Length = Ntohl (length);
i = 0;
Decompression IPOs is the corresponding bit bit of compressed data,//opos is the first byte of the extracted data/for this string application space if (OPOs > 0) {
temp = (unsigned char *) realloc (orig, opos + length + 1); ORig = temp;
else orig = (unsigned char *) malloc (length + 1); Mark = type + offset (in window) + length + Next/Decode offset (in window) + length corresponding string while (I < le
Ngth && remaining > 0) {Orig[opos] = Window[offset + i];
opos++;
Buffer[i] = Window[offset + i];
i++;
remaining--;
}//Deposit Next if (Remaining > 0) {Orig[opos] = next;
opos++;
Buffer[i] = next;
remaining--;
} length--;
}//is the source character else {next =0x00; read out the source character to NEXT for (i = 0; i < lz77_next_bits i++) {TPOs = (sizeof unsigned cha
R) * 8)-lz77_next_bits + i; Bit_set ((unsigned char *) &next, TPOs, TPOs, Bit_get (compressed, IPOs));
ipos++;
} if (OPOs > 0) {temp = (unsigned char *) realloc (orig, opos + 1);
Orig = temp;
else orig = (unsigned char *) malloc (1);
Orig[opos] = next;
opos++;
if (Remaining > 0) buffer[0] = next;
remaining--;
length = 1; //According to the Buffer Update window, where the buffer for each read data, the first update memmove (&window[0], &window[length], lz77_window_s
Ize-length);
Memmove (&window[lz77_window_size-length], &buffer[0], length);
} *original = orig;
return opos; }