This is probably the case when files are compressed (original) using the Harman encoding.
Compression Process: Read files, statistical characters: In my program, the data type is BYTE rather than char, which can compress files other than text files. Then create a user-defined tree. The total number of bytes of the original file that I first saved when saving the compressed file, 4 bytes are used for storage. The reason for this is that after the text is compressed, the last part may not be full of one byte, but it must be saved in one byte, in this way, excessive information is generated during decompression. Then, we store the static linked list corresponding to the Harman tree. I use 1 and 0 to indicate whether the left and right children point to other intermediate nodes or leaves (the characters in the file ). Then, the content of the source file is put into the compressed file according to the Harman encoding. I am so stupid when dealing with this static linked list. I should put the bitwise operations that instruct the left and right children separately in one place, instead of a tag being a BYTE, this is a waste of space. Note that the file contains only one character when creating the Haffman tree. I used to point all the left and right children of the head node to this character. Another way is to save the number of characters and compress the number and character, but the decoding method is different. The first method I use when I am relatively lazy.
Decompression process: Decompress the static linked list according to the Harman code. For encoding, 0 indicates moving toward the left child, and 1 indicates moving toward the right child. For example, if the size of a's Harman code is 10, I will start from the head of the static linked list, first "1", then I will select the right child, then "0 ", his left child is 'A '. In this way, you can decompress the package.
The core code is as follows:
Typedef struct st
{
Short Letter;
Int Frequent;
St * left, * right;
} HuffmanNode, * pHuffman; typedef struct CodeLList
{
HuffmanNode Node;
BYTE left, right;
BYTE lefttag, righttag; // if tag = 0, means that it is a index.
} CodeLList; case ID_COMPRESSBUTTON:
{
Used = 0;
HuffmanNode temp;
Char buffer [258]; Node = new pHuffman [256];
FileByteLen = 0; for (I = 0; I <256; I ++)
{
ElementCount [I] = 0;
}
GetWindowText (hEditFile, szFileName, 50 );
If (szFileName [0] = '/0 ')
{
MessageBox (hwnd, "Enter the file name", "Warning! ",
MB_ OK | MB_ICONWARNING );
Return 0;
}
Else
Fp = fopen (szFileName, "rb ");
If (! Fp)
{
MessageBox (hwnd, TEXT ("this file cannot be found"), TEXT ("Warning! "),
MB_ OK | MB_ICONWARNING );
Return 0;
} While (! Feof (fp ))
{
Fread (& character, sizeof (BYTE), 1, fp );
If (feof (fp ))
Break;
ElementCount [character] ++;
FileByteLen ++;
} For (I = 0; I <256; I ++)
{
If (ElementCount [I]> 0)
{
Node [used] = new HuffmanNode;
Node [used]-> Frequent = ElementCount [I];
Node [used]-> Letter = I;
Node [used]-> left = Node [used]-> right = NULL;
Used ++;
}
} // Sort data in ascending order
For (I = 0; I <used-1; I ++)
For (j = I + 1; j <used; j ++)
{
If (Node [I]> Node [j])
{
Temp = * Node [I];
* Node [I] = * Node [j];
* Node [j] = temp;
}
} // Generate the user tree
Begin = 0;
While (begin <used-1)
{
P = new HuffmanNode;
P-> Letter =-1;
P-> Frequent = Node [begin]-> Frequent + Node [begin + 1]-> Frequent;
P-> left = Node [begin];
P-> right = Node [begin + 1]; for (I = begin + 1; I <used-1; I ++)
{
If (Node [I + 1]-> Frequent <p-> Frequent)
Node [I] = Node [I + 1];
Else
Break;
} If (begin + 2> = used)
Node [begin + 1] = p;
Else
Node [I] = p;
Begin ++;
}
If (used> 1)
{
Head = p;
BuildCode (Head, buffer, 0 );
}
Else
{
Code [Node [0]-> Letter] [0] = '0 ';
Code [Node [0]-> Letter] [1] = '/0 ';
}
Len = strlen (szFileName );
SzFileName [len] = '.';
SzFileName [len + 1] ='s ';
SzFileName [len + 2] = 'l ';
SzFileName [len + 3] = '/0'; fpDest = fopen (szFileName, "wb + ");
// Start writing compressed files
Fwrite (& FileByteLen, sizeof (int), 1, fpDest );
// Table store
If (used> 1) // special processing is required for articles with only one character type.
Count = ConvertTreeList (Head );
Else
{
Count = 1;
CodeLinkList [0]. left = Node [0]-> Letter;
CodeLinkList [0]. right = Node [0]-> Letter;
CodeLinkList [0]. righttag = CodeLinkList [0]. lefttag = 1;
}
Fwrite (& count, sizeof (BYTE), 1, fpDest );
For (I = 0; I <count; I ++)
{
Fwrite (& CodeLinkList [I]. left, sizeof (BYTE), 1, fpDest );
Fwrite (& CodeLinkList [I]. lefttag, sizeof (BYTE), 1, fpDest );
Fwrite (& CodeLinkList [I]. right, 1, sizeof (BYTE), fpDest );
Fwrite (& CodeLinkList [I]. righttag, sizeof (BYTE), 1, fpDest );
}
// Encoding
Bitcount = 0;
Codebyte = 0;
Fseek (fp, 0, SEEK_SET );
While (! Feof (fp ))
{
Character = fgetc (fp );
If (character <0)
Break;
Len = strlen (Code [character]);
For (I = 0; I <len; I ++)
{
If (bitcount = 8)
{
Fwrite (& codebyte, sizeof (BYTE), 1, fpDest );
Codebyte = 0;
Bitcount = 0;
}
Codebyte * = 2;
If (Code [character] [I] = '1 ')
Codebyte ++;
Bitcount ++;
}
} If (bitcount <8)
{
While (bitcount <8)
{
Codebyte * = 2;
Bitcount ++;
} Fwrite (& codebyte, sizeof (BYTE), 1, fpDest );
}
Fclose (fp );
Fclose (fpDest );
Return 0;
} Break; case ID_EXPANDBUTTON:
{
BYTE currentindex;
Bool ByteContent [8]; GetWindowText (hEditFile, szFileName, 50 );
Len = strlen (szFileName );
If (szFileName [0] = '/0 ')
{
MessageBox (hwnd, "Enter the file name", "Warning! ",
MB_ OK | MB_ICONWARNING );
Return 0;
}
Else if (szFileName [len-1]! = 'L' |
SzFileName [len-2]! ='S '|
SzFileName [len-3]! = '.' | Len = 3)
{
MessageBox (hwnd, "incorrect file type", "Warning! ",
MB_ OK | MB_ICONWARNING );
Return 0;
}
Else
Fp = fopen (szFileName, "rb ");
If (! Fp)
{
MessageBox (hwnd, TEXT ("this file cannot be found"), TEXT ("Warning! "),
MB_ OK | MB_ICONWARNING );
Return 0;
} Fread (& FileByteLen, sizeof (int), 1, fp );
Fread (& count, sizeof (BYTE), 1, fp );
For (I = 0; I <count; I ++)
{
Fread (& CodeLinkList [I]. left, sizeof (BYTE), 1, fp );
Fread (& CodeLinkList [I]. lefttag, sizeof (BYTE), 1, fp );
Fread (& CodeLinkList [I]. right, sizeof (BYTE), 1, fp );
Fread (& CodeLinkList [I]. righttag, sizeof (BYTE), 1, fp );
}
// Obtain the extracted file name.
SzFileName [len-3] = '/0 ';
FpDest = fopen (szFileName, "wb + ");
Currentindex = 0;
While (! Feof (fp) & FileByteLen)
{
Fread (& codebyte, 1, 1, fp );
For (I = 0; I <8; I ++)
{
ByteContent [7-I] = codebyte & 1;
Codebyte/= 2;
}
For (I = 0; I <8; I ++)
{
If (! ByteContent [I]) // left subtree
{
If (CodeLinkList [currentindex]. lefttag)
{
// Fwrite (& CodeLinkList [currentindex]. left, sizeof (BYTE), 1, fpDest );
Fputc (CodeLinkList [currentindex]. left, fpDest );
FileByteLen --;
Currentindex = 0;
}
Else
Currentindex = CodeLinkList [currentindex]. left;
}
Else
{
If (CodeLinkList [currentindex]. righttag)
{
// Fwrite (& CodeLinkList [currentindex]. right, sizeof (BYTE), 1, fpDest );
Fputc (CodeLinkList [currentindex]. right, fpDest );
FileByteLen --;
Currentindex = 0;
}
Else
Currentindex = CodeLinkList [currentindex]. right;
} If (! FileByteLen)
Break;
}
}
Fclose (fp );
Fclose (fpDest );
Return 0;
} Break;
}