Huffman Coding (Huffman coding) is a coding method, and Huffman coding is a variable-length encoding (VLC).
Huffman code uses a variable-length coding table to encode a source symbol (such as a letter in a file). The Variable length coding table is obtained by a method of evaluating the occurrence probability of the source symbol, the high probability letter uses a shorter encoding, whereas the lower probability of occurrence uses a longer encoding, which makes the average length of the encoded string , the expected value is reduced, so as to achieve lossless data compression.
the specific steps that Hoffman encodes are as follows:
1 in order to reduce the probability of the source symbol in the queue.
2 adds the two minimum probability and continues this step, always placing the higher probability branch on the right until the end becomes the probability 1.
3 to draw the path from probability 1 to each source symbol, in order to write down 0 and 1 along the path, the result is the Huffman code word of the symbol.
4 The left one of each pair of combinations is specified as 0, and the right one is specified as 1 (or vice versa).
Example: There is an existing 30-symbol string of 5 different symbols:
BABACAC Adadabb Cbabebe Ddabeeebb
1 First calculates the number of occurrences of each character (probability):
2 Add the smallest two occurrences (probability) and act as the left and right subtree until the probability value is 1.
First: Add the lowest probability value of 3 and 4 and combine into 7:
Second: Add a minimum value of 5 and 7 and combine it into 12:
Third time: Add 8 and 10 together to 18:
Fourth time: Add the minimum value 12 and 18, and end the combination:
3 The left of each binary tree is specified as 0, the right is 1 4 along the top of the binary tree to each character path, and the encoding of each symbol is obtained.
We can see that the more the number of occurrences (probability) of the more on the upper layer, the shorter the coding, the less frequency of the more in the lower layer, the longer the code. When we encode, we are coded by "bit", decoding is done by bit, if we have such bitset "10111101100″ then the decoding is" Abbde ". So, we need to build our Huffman Code and decode dictionary table through this binary tree.
It should be noted here that the Huffman encoding allows each character to be encoded differently than the first part of another character encoding, and does not appear as ' A ': ' B ': 001, in which case the decoding will not clash.
the limitations of Hoffman's coding
With Huffman encoding, the encoding length of each symbol can only be integers, therefore, if the probability distribution of the source symbol set is not 2 minus N, the entropy limit can not be reached; the number of input symbols is limited by the size of the achievable Code table, the decoding is complex, the probability distribution of the input symbol set needs to be realized; there is no error protection function.
Hoffman Coding Implementation (C + + implementation):
int main ()
{
int n, W;
char c;
string S;
cout << "Input size of char:";
CIN >> N;
Binartnodes Bn;
for (int i = 0; I!= N; ++i)
{
cout << "input char and Weight:";
CIN >> c >> W;
Bn.add_node (Node (c, W));
Cin.clear ();
}
while (Bn.size ()!= 1)
{
node N1 = Bn.pop (), //Get the first two weights the smallest node
n2 = Bn.pop ();
Node h (', n1.get_weight () + n2.get_weight ()); New node, weighted to the top two node weights and
if (N1.get_weight () < N2.get_weight ()) //weights smaller nodes on the left of the new node
{
H.set (n1, N2); Set the new node left and right child node
}
else
{
H.set (N2, N1);
}
Bn.add_node (h); Inserts a new node into the Multiset
}
encodeing (Bn.get_node (), s); Code
cout << "Input Huffman Code:";
Cin >> S;
cout << "decoded chars:";
Decoding (Bn.get_node (), s); Decoding
}
Handle.h Handle Class:
/*handle.h*///Handle model class template <class type> class handle{public:handle (Type *ptr = 0): PN (PTR), use (New size_t ( 1) {} type& operator* (); Overloaded operator * type* operator-> ();
Overloaded operator-> Const type& operator* () const;
Const type* operator-> () const; Handle (const Handle &h): PN (H.PN), use (h.use) {++*use;} Copy Operation handle& operator= (const Handle &h); Overloaded operator =, assignment Operation ~handle () {rem_ref ();} Private:type *pn of destructor function; Object pointer size_t *use;
usage void Rem_ref () {if (--*use = = 0) {Delete pn; Delete use;}
}
};
Template <class type> inline type& handle<type>::operator* () {if (PN) return *PN;
Throw Runtime_error ("dereference of Unbound Handle");
} template <class type> inline const type& handle<type>::operator* () const {if (PN) return *PN;
Throw Runtime_error ("dereference of Unbound Handle"); } template <class type> inline Type* handle<type>::operator-> () {if (PN) return PN;
Throw Runtime_error ("Access through unbound handle");
} template <class type> inline const type* handle<type>::operator-> () const {if (PN) return PN;
Throw Runtime_error ("Access through unbound handle"); Template <class type> inline handle<type>& handle<type>::operator= (const Handle &RHS) {+ +
*rhs.use;
Rem_ref ();
PN = RHS.PN;
Use = Rhs.use;
return *this; }
Node.h Node class:
/*node.h*/template <class t> class Handle; Class node{friend class handle<node>; Handle Model Class Public:node (): Ch ("), Wei (0), bits (), LC (), RC () {} Node (const char C, const int W): Ch (c), Wei (w)
, bits (), LC (), RC () {} node (const node &n) {ch = n.ch; wei = n.wei; bits = n.bits; LC = N.LC; rc = n.rc;
Virtual node* Clone () const {return new Node (*this);} int Get_weight () const {return Wei} Get weight char Get_char () const {return ch;} Get character Node &get_lchild () {return *LC;} Get the Left Node node &get_rchild () {return *RC}
Get the right node void set (const node &l, const node &r) {//Set the left-node LC = handle<node> (new Node (L));
rc = handle<node> (new Node (R));} void Set_bits (const string &s) {bits = s;} Set encoding Private:char ch; character int Wei; Weight string bits; Encoding handle<node> LC; Left node handle handle<node> RC;
Right node handle}; InlINE bool Compare (const node &LHS, const node &RHS); Multiset comparison function inline bool Compare (const node &LHS, const node &RHS) {return lhs.get_weight () < rhs.ge
T_weight (); Class binartnodes{typedef bool (*COMP) (const node&, const node&); Public:binartnodes (): MS (compare) {} Initialize MS's comparison function void Add_node (Node &n) {Ms.insert (n);} Increase node node pop (); Out node size_t size () {return ms.size ();} Gets the multiset size Node Get_node () {return *ms.begin ();}
Get multiset First Data private:multiset<node, comp> MS;
}; /*node.cpp*/#include "Node.h" node Binartnodes::p op () {Node n = *ms.begin (); Gets multiset First Data ms.erase (Ms.find (*ms.begin ()));
Remove the data from the Multiset return n; }
Hoffman Coding Implementation (C language implementation):
#include <stdio.h> #include <stdlib.h> #include <string> #include <iostream> #define MAXBIT 10
0 #define MAXVALUE 10000 #define MAXLEAF #define MAXNODE maxleaf*2-1 typedef struct {int bit[maxbit];
int start; } Hcodetype;
/* Coded structure body/typedef struct {int weight;
int parent;
int lchild;
int rchild;
char value; } Hnodetype; /* node structure////* Constructs a huffman tree/void Huffmantree (Hnodetype huffnode[maxnode], int n) {/* I, J: Loop variable, m1, M2: Construct Huffman tree in different process two Weights of the minimum weights, x1, X2: The ordinal number of two minimum weights nodes in the different process of constructing Huffman tree.
* * int I, J, M1, M2, X1, x2; /* Initialization Store Huffman tree Array huffnode[] in the node */for (i=0; i<2*n-1; i++) {huffnode[i].weight = 0;//weight huffno
De[i].parent =-1;
Huffnode[i].lchild =-1;
Huffnode[i].rchild =-1; Huffnode[i].value= '; The actual value can be replaced by the letter}/* End for/////* Enter the weight of the N leaf node/for (i=0 i<n; i++) {printf ("I Nput Char of leaf node: ", i);
scanf ("%c", &huffnode[i].value);
GetChar ();
}/* End for/for (i=0 i<n; i++) {printf ("Please input weight of leaf node:", I);
scanf ("%d", &huffnode[i].weight);
GetChar (); }/* End for///////* Loop constructs Huffman tree/for (i=0; i<n-1; i++) {m1=m2=maxvalue;
* * M1, M2 storage two without the parent node and the node weight of the smallest two nodes * * X1=X2=0; /* Find out the two nodes with minimum weights and no parent nodes in all nodes, and merge them as a binary tree./For (j=0 j<n+i; j + +) {if huffnode[j].weight <
M1 && Huffnode[j].parent==-1) {m2=m1;
x2=x1;
M1=huffnode[j].weight;
X1=j; else if (Huffnode[j].weight < m2 && huffnode[j].parent==-1) {M2=huff
Node[j].weight;
X2=j;
}/////////////////////////* Set two child nodes x1, x2 parent Node Information * * huffnode[x1].parent = n+i; HUFFNODE[X2].parent = N+i;
Huffnode[n+i].weight = Huffnode[x1].weight + huffnode[x2].weight;
Huffnode[n+i].lchild = x1;
Huffnode[n+i].rchild = x2; printf ("X1.weight and X2.weight in round%d:%d,%d\n", I+1, Huffnode[x1].weight, huffnode[x2].weight);
/* For testing/printf ("\ n"); }////* End Huffmantree///decoding void decodeing (char string[],hnodetype buf[],int Num) {int I,tmp=0,co
DE[1024];
int m=2*num-1;
Char *nump;
Char num[1024];
For (I=0;i<strlen (string); i++) {if (string[i]== ' 0 ') num[i]=0;
else num[i]=1;
} i=0;
nump=&num[0];
while (nump< (&num[strlen (String))) {tmp=m-1; while ((Buf[tmp].lchild!=-1) && (buf[tmp].rchild!=-1)) {if (*nump==0) {tmp=buf[tmp].
Lchild;
else Tmp=buf[tmp].rchild;
nump++; printf ("%c", Buf[tmp].value);
int main (void) {Hnodetype huffnode[maxnode]; /* Define a node structure body array */Hcodetype huffcode[maxleaf], cd;
/* Defines an array of coded structures, while defining a temporary variable to hold the information when solving the encoding/int I, J, C, p, N;
Char pp[100];
printf ("Please input n:\n");
scanf ("%d", &n);
Huffmantree (Huffnode, N);
for (i=0 i < n; i++) {cd.start = n-1;
c = i;
p = huffnode[c].parent;
while (P!=-1)/* Parent node exists * * {if (huffnode[p].lchild = = c) Cd.bit[cd.start] = 0;
else Cd.bit[cd.start] = 1; cd.start--;
* * For the lower one of the code * * C=P; P=huffnode[c].parent;
/* Set the next cyclic condition */}//////////* Save the Huffman encoding and encoding of each leaf node (j=cd.start+1 j<n; j + +)
{Huffcode[i].bit[j] = cd.bit[j];}
Huffcode[i].start = Cd.start;
}/* End for///* Output All existing encoded Huffman codes For (i=0 i<n; i++) {printf ("%d's Huffman code is:", I);
for (J=huffcode[i].start+1 J < N; j + +) {printf ("%d", huffcode[i].bit[j]);
printf ("start:%d", Huffcode[i].start);
printf ("\ n"); printf ("Decoding?
Please Enter code:\n ");
scanf ("%s", &pp);
Decodeing (Pp,huffnode,n);
GetChar ();
return 0; }