File compression using the Harman Encoding

Source: Internet
Author: User
This is probably the case when files are compressed (original) using the Harman encoding.
Compression Process: Read files, statistical characters: In my program, the data type is BYTE rather than char, which can compress files other than text files. Then create a user-defined tree. The total number of bytes of the original file that I first saved when saving the compressed file, 4 bytes are used for storage. The reason for this is that after the text is compressed, the last part may not be full of one byte, but it must be saved in one byte, in this way, excessive information is generated during decompression. Then, we store the static linked list corresponding to the Harman tree. I use 1 and 0 to indicate whether the left and right children point to other intermediate nodes or leaves (the characters in the file ). Then, the content of the source file is put into the compressed file according to the Harman encoding. I am so stupid when dealing with this static linked list. I should put the bitwise operations that instruct the left and right children separately in one place, instead of a tag being a BYTE, this is a waste of space. Note that the file contains only one character when creating the Haffman tree. I used to point all the left and right children of the head node to this character. Another way is to save the number of characters and compress the number and character, but the decoding method is different. The first method I use when I am relatively lazy.
Decompression process: Decompress the static linked list according to the Harman code. For encoding, 0 indicates moving toward the left child, and 1 indicates moving toward the right child. For example, if the size of a's Harman code is 10, I will start from the head of the static linked list, first "1", then I will select the right child, then "0 ", his left child is 'A '. In this way, you can decompress the package.
The core code is as follows:
Typedef struct st
{
Short Letter;
Int Frequent;
St * left, * right;
} HuffmanNode, * pHuffman; typedef struct CodeLList
{
HuffmanNode Node;
BYTE left, right;
BYTE lefttag, righttag; // if tag = 0, means that it is a index.
} CodeLList; case ID_COMPRESSBUTTON:
{
Used = 0;
HuffmanNode temp;
Char buffer [258]; Node = new pHuffman [256];
FileByteLen = 0; for (I = 0; I <256; I ++)
{
ElementCount [I] = 0;
}

GetWindowText (hEditFile, szFileName, 50 );
If (szFileName [0] = '/0 ')
{
MessageBox (hwnd, "Enter the file name", "Warning! ",
MB_ OK | MB_ICONWARNING );
Return 0;
}
Else
Fp = fopen (szFileName, "rb ");
If (! Fp)
{
MessageBox (hwnd, TEXT ("this file cannot be found"), TEXT ("Warning! "),
MB_ OK | MB_ICONWARNING );
Return 0;
} While (! Feof (fp ))
{
Fread (& character, sizeof (BYTE), 1, fp );
If (feof (fp ))
Break;
ElementCount [character] ++;
FileByteLen ++;
} For (I = 0; I <256; I ++)
{
If (ElementCount [I]> 0)
{
Node [used] = new HuffmanNode;
Node [used]-> Frequent = ElementCount [I];
Node [used]-> Letter = I;
Node [used]-> left = Node [used]-> right = NULL;
Used ++;
}
} // Sort data in ascending order
For (I = 0; I <used-1; I ++)
For (j = I + 1; j <used; j ++)
{
If (Node [I]> Node [j])
{
Temp = * Node [I];
* Node [I] = * Node [j];
* Node [j] = temp;
}
} // Generate the user tree
Begin = 0;
While (begin <used-1)
{
P = new HuffmanNode;
P-> Letter =-1;
P-> Frequent = Node [begin]-> Frequent + Node [begin + 1]-> Frequent;
P-> left = Node [begin];
P-> right = Node [begin + 1]; for (I = begin + 1; I <used-1; I ++)
{
If (Node [I + 1]-> Frequent <p-> Frequent)
Node [I] = Node [I + 1];
Else
Break;
} If (begin + 2> = used)
Node [begin + 1] = p;
Else
Node [I] = p;

Begin ++;
}

If (used> 1)
{
Head = p;

BuildCode (Head, buffer, 0 );
}
Else
{
Code [Node [0]-> Letter] [0] = '0 ';
Code [Node [0]-> Letter] [1] = '/0 ';
}

Len = strlen (szFileName );
SzFileName [len] = '.';
SzFileName [len + 1] ='s ';
SzFileName [len + 2] = 'l ';
SzFileName [len + 3] = '/0'; fpDest = fopen (szFileName, "wb + ");

// Start writing compressed files
Fwrite (& FileByteLen, sizeof (int), 1, fpDest );
// Table store
If (used> 1) // special processing is required for articles with only one character type.
Count = ConvertTreeList (Head );
Else
{
Count = 1;
CodeLinkList [0]. left = Node [0]-> Letter;
CodeLinkList [0]. right = Node [0]-> Letter;
CodeLinkList [0]. righttag = CodeLinkList [0]. lefttag = 1;
}
Fwrite (& count, sizeof (BYTE), 1, fpDest );
For (I = 0; I <count; I ++)
{
Fwrite (& CodeLinkList [I]. left, sizeof (BYTE), 1, fpDest );
Fwrite (& CodeLinkList [I]. lefttag, sizeof (BYTE), 1, fpDest );
Fwrite (& CodeLinkList [I]. right, 1, sizeof (BYTE), fpDest );
Fwrite (& CodeLinkList [I]. righttag, sizeof (BYTE), 1, fpDest );
}

// Encoding
Bitcount = 0;
Codebyte = 0;
Fseek (fp, 0, SEEK_SET );
While (! Feof (fp ))
{
Character = fgetc (fp );
If (character <0)
Break;
Len = strlen (Code [character]);
For (I = 0; I <len; I ++)
{
If (bitcount = 8)
{
Fwrite (& codebyte, sizeof (BYTE), 1, fpDest );
Codebyte = 0;
Bitcount = 0;
}
Codebyte * = 2;
If (Code [character] [I] = '1 ')
Codebyte ++;
Bitcount ++;
}
} If (bitcount <8)
{
While (bitcount <8)
{
Codebyte * = 2;
Bitcount ++;
} Fwrite (& codebyte, sizeof (BYTE), 1, fpDest );
}

Fclose (fp );
Fclose (fpDest );
Return 0;
} Break; case ID_EXPANDBUTTON:
{
BYTE currentindex;
Bool ByteContent [8]; GetWindowText (hEditFile, szFileName, 50 );
Len = strlen (szFileName );
If (szFileName [0] = '/0 ')
{
MessageBox (hwnd, "Enter the file name", "Warning! ",
MB_ OK | MB_ICONWARNING );
Return 0;
}
Else if (szFileName [len-1]! = 'L' |
SzFileName [len-2]! ='S '|
SzFileName [len-3]! = '.' | Len = 3)
{
MessageBox (hwnd, "incorrect file type", "Warning! ",
MB_ OK | MB_ICONWARNING );
Return 0;
}
Else
Fp = fopen (szFileName, "rb ");
If (! Fp)
{
MessageBox (hwnd, TEXT ("this file cannot be found"), TEXT ("Warning! "),
MB_ OK | MB_ICONWARNING );
Return 0;
} Fread (& FileByteLen, sizeof (int), 1, fp );
Fread (& count, sizeof (BYTE), 1, fp );
For (I = 0; I <count; I ++)
{
Fread (& CodeLinkList [I]. left, sizeof (BYTE), 1, fp );
Fread (& CodeLinkList [I]. lefttag, sizeof (BYTE), 1, fp );
Fread (& CodeLinkList [I]. right, sizeof (BYTE), 1, fp );
Fread (& CodeLinkList [I]. righttag, sizeof (BYTE), 1, fp );
}

// Obtain the extracted file name.
SzFileName [len-3] = '/0 ';
FpDest = fopen (szFileName, "wb + ");

Currentindex = 0;

While (! Feof (fp) & FileByteLen)
{
Fread (& codebyte, 1, 1, fp );
For (I = 0; I <8; I ++)
{
ByteContent [7-I] = codebyte & 1;
Codebyte/= 2;
}

For (I = 0; I <8; I ++)
{
If (! ByteContent [I]) // left subtree
{
If (CodeLinkList [currentindex]. lefttag)
{
// Fwrite (& CodeLinkList [currentindex]. left, sizeof (BYTE), 1, fpDest );
Fputc (CodeLinkList [currentindex]. left, fpDest );
FileByteLen --;
Currentindex = 0;
}
Else
Currentindex = CodeLinkList [currentindex]. left;
}
Else
{
If (CodeLinkList [currentindex]. righttag)
{
// Fwrite (& CodeLinkList [currentindex]. right, sizeof (BYTE), 1, fpDest );
Fputc (CodeLinkList [currentindex]. right, fpDest );

FileByteLen --;
Currentindex = 0;
}
Else
Currentindex = CodeLinkList [currentindex]. right;
} If (! FileByteLen)
Break;
}
}

Fclose (fp );
Fclose (fpDest );
Return 0;
} Break;
}

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.