Using Huffman tree for file compression

Source: Internet
Author: User
Tags assert int size uncompress

Project Description:


Project Introduction: The use of Huffman encoding the file compression, and the compressed file can be decompressed

Development environment: Windows vs2013

Project Overview:

1. Compression

A. Read the file, composing each character, the number of times the character appears, and the weight value Huffman tree

B. Huffman tree is the use of small heap composition, the number of occurrences of the node pointer has a heap top, the number of occurrences in the heap bottom

C. Each fetch the top of the heap two number, and then add two numbers into the heap, until the heap is taken out, then Huffman Tree also built

D. Get Huffman code from Huffman Tree, then get the character encoding according to the whole character array

E. Write the encoded string to a compressed file (value processing code 1 with it, 0 moving bits) each time the encoding is reached after 8 bits are obtained

F. Write the configuration file, count each character and its occurrences, and save it as "character +", "+ times" in the configuration file

2. Unzip

A. Read the configuration file and count the number of characters

B. Build Huffman tree, read and unzip the file, write the characters of this node of the encoded character read to the extracted file, know to read the compressed file

C. Compression decompression complete, small file large file test


The code is as follows:

#pragma once #include "HuffManTree.h" #include <string> struct Charinfo {charinfo (int count=0): _count (count) {
	} bool operator< (const Charinfo info) {return _count < Info._count;
	} bool Operator> (const Charinfo info) {return _count>info._count;
	} bool Operator!= (const Charinfo info) {return _count! = Info._count;
	} charinfo operator+ (const charinfo Info) {return charinfo (_count + info._count);

} char _ch;//character int _count;//The number of occurrences of string _code;//character corresponding to the encoding};
			Class Filecompress {public:filecompress () {for (int i = 0; i <; i++) {_info[i]._ch = i;
		_info[i]._count = 0;
		}} public:void Compress (const char* FileName)//Compression {file* Fout = fopen (FileName, "RB");
		
		ASSERT (Fout);
		Number of occurrences of statistical characters int ch = fgetc (fout);
		printf ("%c\n", ch);
		int count = 0;
			while (ch!= EOF) {_info[unsigned char (ch)]._count++;
			ch = fgetc (fout);
		count++;
		}//Build Huffman tree Charinfo Invalid; Huffmantree<charinfo> H (_info, invalid);
		Generate Huffman code string code;

		_gethuffmancode (H._getroot (), code);
		string compressfilename = FileName;
		Compressfilename + = ". Compress";
		file* fin = fopen (Compressfilename.c_str (), "WB");
		ASSERT (Fin);

		Fseek (fout, 0, Seek_set);//From the beginning of the file ch = (unsigned char) fgetc (fout);
		char value = 0;
		int size = 0;
			while (ch! = EOF) {string _ccode = _info[(unsigned char) ch]._code;
				for (int i = 0; i < _ccode.size (); ++i) {value <<= 1;
				if (_ccode[i] = = ' 1 ') {value |=1;
				} size++;
					if (size = = 8) {FPUTC (value, fin);
					Value = 0;
				size = 0;
		}} ch = fgetc (fout);
			}//Complement if (size!=0) {value <<= (8-size);
		FPUTC (value, fin);
		}//write config file string configfilename = FileName;
		Configfilename + = ". Config.txt";
		file* finconfig = fopen (Configfilename.c_str (), "WB");
	
		ASSERT (Finconfig);
		String str;
		Char buf[128]; for (int i = 0; i < i++) {if (_info[i]._count>0) {str + = _info[i]._ch;
				str + = ', ';
				_itoa (_info[i]._count, buf, 10);
				str + = BUF;

				str + = ' \ n ';
				Fputs (Str.c_str (), finconfig);
			Str.clear ();
		}} fclose (Fin);
		Fclose (Fout);
	Fclose (Finconfig);
		} void Uncompress (const char* FileName)//Unzip {//Read config file string configfilenane = FileName;
		Configfilenane + = ". Config.txt";
		file* foutconfig = fopen (Configfilenane.c_str (), "RB");
		ASSERT (Foutconfig);
		int count = 0;
		String str;
				while (Read_a_line (FOUTCONFIG,STR)) {if (Str.empty ()) {str + = ' \ n ';
				Count + = 1;
			Str.clear ();
			}//else//{////_info[(unsigned char) str[0]] = Atoi (Str.substr (2). C_STR ());
			Count + = _info[(unsigned char) str[0]]._count;
			Str.clear ();
			}//_info[((unsigned char) str[0])]._count = Atoi (Str.substr (2). C_STR ());

			Count + = _info[(unsigned char) str[0]]._count;
				else {unsigned char ch = str[0]; _info[ch]._count = Atoi (Str.substr (2). C_str ());
				Count + = _info[ch]._count;
			Str.clear ();
		}} charinfo Invaild;

		Huffmantree<charinfo> Tree (_info, 256,invaild);
		string uncompressfilename = FileName;
		Uncompressfilename + = ". Uncompress";//Unzip the file string compressfilename = FileName;

		Compressfilename + = ". Compress";
		file* fout = fopen (Compressfilename.c_str (), "RB");
		ASSERT (Fout);
		file* fin = fopen (Uncompressfilename.c_str (), "WB");

		ASSERT (Fin);
		huffmantreenode<charinfo>* root = Tree._getroot ();
		huffmantreenode<charinfo>* cur = root;
		int ch = fgetc (fout);

		int size = 7;
			while (ch = EOF) {if (Ch & (1 << size)) {cur = cur->_right;
			} else {cur = cur->_left;
				} if (Cur->_left==null&&cur->_right==null) {FPUTC (cur->_weight._ch, Fin);
				cur = root;
				count--;
			if (count = = 0)//break;
			} size--;
				if (size<0) {ch=fgetc (fout);
			size = 7;
}} fclose (Fin);		Fclose (Fout);
	Fclose (Foutconfig);
		} protected:bool read_a_line (file*& fout,string& str) {int ch = fgetc (fout);
		
		if (ch = = EOF) return false;
			while (ch! = eof&&ch!= ' \ n ') {str + = ch;
		ch = fgetc (fout);
	} return true; } void _gethuffmancode (const huffmantreenode<charinfo>* root,string code)//Generate Huffman encoding {if (root = = NULL) {R
		Eturn;  } if (Root->_left = = Null&&root->_right = = NULL) {_info[unsigned char ((root->_weight). _ch)]._code
			= code;
		Return } if (Root->_left) _gethuffmancode (root->_left, code + ' 0 ');//Left 0 if (root->_right) _gethuffmancode (RO
Ot->_right, code + ' 1 ');//on the 1} private:charinfo _info[256]; };


#pragma once #include <iostream> #include "Heap.h" using namespace std; Template<class t> struct Huffmantreenode {huffmantreenode (const t& weight): _left (null), _right (null), _wei
	Ght (weight) {} huffmantreenode<t>* _left;
	huffmantreenode<t>* _right;
T _weight;


}; Template<class t> class Huffmantree {public:typedef huffmantreenode<t> Node; Public:huffmantree (T* arr, int size, t& invalid)//build a small heap {struct Comparenode {bool operator () (node*& l,node*& R) {retur
			N L->_weight < r->_weight;
		}
		};

		heap<node*, comparenode> minheap;
		for (int i = 0; i < size; i++) {if (arr[i]!=invalid) Minheap.push (New Node (Arr[i]));
			} while (Minheap.size () >1) {node* left = Minheap.top ();
			Minheap.pop ();
			node* right = Minheap.top ();
			Minheap.pop ();
			node* parent = new Node (left->_weight + right->_weight);
			Parent->_left = left; Parent->_right = Right;
		Minheap.push (parent);
		} _root = Minheap.top ();
	Minheap.pop ();
	} node* _getroot () {return _root;
} private:node* _root;			


};
#pragma once #include <iostream> #include <vector> using namespace std; 
	#include <assert.h> template<class t> struct Small {public:bool operator () (const t& L, const t& R)
	{return L < R;
}
};  Template<class t>//can be used to build large heaps of//struct big//{/bool operator (const t& L, const t& R)//{//return L
> R;

//	}
//}; Template<class T,class comparenode=small<t>>//Build a small heap of class heap {Public:heap () {} heap (const t* Arr,in
		T size) {for (int i = 0; i < size; i++) {_v.push_back (arr[i]);
		} for (int i = _v.size ()/2-1; i>=0; i--) {_adjustdown (i);
		}} ~heap () {} void Push (const t& D) {_v.push_back (d);
	_adjustup (_v.size ()-1);
	} int Size () {return _v.size ();
	} t& Top () {return * (_v.begin ()); } void Pop ()//With the Exchange method {swap (_v[0], _v[_v.size ()-1]);
		_v.pop_back ();
	_adjustdown (0);
		} protected:void _adjustdown (int parent)//downward adjustment {Comparenode comparenode;

		int child = 2 * parent + 1; while (Child < _v.size ()) {if (child + 1 < _v.size () && Comparenode (_v[child + 1], _v[child])//Find smaller CH
			ILD {child++;
				} if (Comparenode (_v[child], _v[parent]) {swap (_v[parent], _v[child]);
				parent = child;
			Child = 2 * parent + 1;
		} else break;
		}} void _adjustup (int child)//upward adjustment {Comparenode comparenode;
		int parent = (child-1)/2; while (child>0) {/*if (child + 1 < _v.size () && Comparenode (_v[child + 1], _v[child])) {child+
			+;
				} */if (Comparenode (_v[child], _v[parent])) {swap (_v[parent], _v[child]);
				Child = parent;
			Parent = (child-1)/2;
		} else break;
}} private:vector<t> _v;


};
	#include "FileCompress.h" void Test () {filecompress F; F.compress ("Input.txT ");
F.uncompress ("Input.txt");
	} int main () {test ();
	System ("pause");
return 0; }



Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.