You may think that the program and the third party provides a lot of compression, why do you write the compression code? Yes, such as gzip a lot of compression tools, but in some cases (such as the text content is small and the characters do not repeat), gzip compression will be larger than the original text. So in some special cases, you can use your own compression method to be more excellent.
We may have forgotten the Huffman knowledge in school learning, you can first in Baidu Encyclopedia to understand the Huffman knowledge: http://baike.baidu.com/view/127820.htm
Huffman thought: statistic text character repetition rate, find out each word Fu Quan value, then construct an optimal binary tree (also called Huffman Tree), and then give each leaf node to generate a bit as a unit of code value, each code value can not be used as a prefix for other code values, Code values are then merged to generate one byte per 8.
Copy Code code as follows:
Package Com.huffman;
/**
* Knot Point
* @author Davee
*/
public class Node implements comparable<node> {
int weight;//Weight Value
Node leftchild;//left child node
Node rightchild;//right child node
String Huffcode;
Whether private Boolean isleaf;//is a leaf
Character value;
Public Node (Character value, int weight) {
This.value = value;
This.weight = weight;
This.isleaf = true;
}
public Node (int weight, node leftchild, node Rightchild) {
This.weight = weight;
This.leftchild = Leftchild;
This.rightchild = Rightchild;
}
public void increaseweight (int i) {
Weight = i;
}
public boolean isleaf () {
return isleaf;
}
@Override
public int compareTo (Node o) {
return this.weight-o.weight;
}
}
Copy Code code as follows:
Package Com.huffman;
Import Java.math.BigInteger;
Import java.util.ArrayList;
Import java.util.Collections;
Import Java.util.HashMap;
Import Java.util.Map;
Import Java.util.TreeMap;
public class Huffmantree {
Private Boolean debug = false;
Private Hashmap<character, node> Nodemap;
Private Arraylist<node> nodelist;
Public Huffmantree () {
Nodemap = new Hashmap<character, node> ();
nodelist = new arraylist<node> ();
}
public void Setdebug (Boolean debug) {
This.debug = Debug;
}
Public String decode (map<string, character> codetable, string binary) {
int begin = 0, end = 1, Count = Binary.length ();
StringBuffer sb = new StringBuffer ();
while (end <= count) {
String key = Binary.substring (begin, end);
if (Codetable.containskey (key)) {
Sb.append (Codetable.get (key));
begin = END;
} else {
}
end++;
}
return sb.tostring ();
}
public string Encode (string origintext) {
if (Origintext = null) return null;
Calculateweight (Origintext);
if (Debug) printnodes (nodelist);
Node root = Generatehuffmantree (nodelist);
Generatehuffmancode (Root, "");
if (Debug) printnodes (root);
StringBuffer sb = new StringBuffer ();
For (Character Key:originText.toCharArray ()) {
Sb.append (Nodemap.get (key). Huffcode);
}
if (Debug) System.out.println ("Binary:" +sb.tostring ());
return sb.tostring ();
}
/**
* Calculate the leaf weight value
* @param text
*/
private void Calculateweight (String text) {
For (Character C:text.tochararray ()) {
if (Nodemap.containskey (c)) {
Nodemap.get (c). Increaseweight (1);//Weight plus 1
} else {
Node Leafnode = new Node (c, 1);
Nodelist.add (Leafnode);
Nodemap.put (c, Leafnode);
}
}
}
/**
* Generate Huffman Tree
* @param nodes
*/
Private Node Generatehuffmantree (arraylist<node> nodes) {
Collections.sort (nodes);
while (Nodes.size () > 1) {
Node ln = nodes.remove (0);
Node rn = nodes.remove (0);
Insertsort (nodes, new Node (Ln.weight + rn.weight, LN, RN));
}
Node root = nodes.remove (0);
nodes = null;
return root;
}
/**
* Insert Sort
* @param sortednodes
* @param node
*/
private void Insertsort (arraylist<node> sortednodes, node node) {
if (sortednodes = null) return;
int weight = Node.weight;
int min = 0, max = Sortednodes.size ();
int index;
if (sortednodes.size () = = 0) {
index = 0;
else if (Weight < Sortednodes.get (min). Weight) {
index = min;//inserted into the first
else if (weight >= sortednodes.get (max-1). Weight) {
index = max;//Insert to Last
} else {
index = MAX/2;
for (int i=0, COUNT=MAX/2 i<=count; i++) {
if (weight >= sortednodes.get (index-1). Weight && Weight < Sortednodes.get (index). Weight) {
Break
else if (weight < Sortednodes.get (index). Weight) {
max = index;
} else {
min = index;
}
Index = (max + min)/2;
}
}
Sortednodes.add (Index, node);
}
private void Generatehuffmancode (node node, String code) {
if (Node.isleaf ()) Node.huffcode = code;
else {
Generatehuffmancode (Node.leftchild, code + "0");
Generatehuffmancode (Node.rightchild, code + "1");
}
}
/**
* Generate Code table
* @return
*/
Public map<string, Character> getcodetable () {
map<string, character> map = new hashmap<string, character> ();
For (Node node:nodeMap.values ()) {
Map.put (Node.huffcode, Node.value);
}
return map;
}
/**
* Print node information
* @param root
*/
private void Printnodes (Node root) {
System.out.println ("character Fu Quan value of the Code");
Printtree (root);
}
private void Printtree (Node root) {
if (Root.isleaf ()) System.out.println (Root.value = null? "": Root.value) + "" +root.weight+ "" + (Root.huffcode = null? "": Root.huffcode));
if (root.leftchild!= null) printtree (root.leftchild);
if (root.rightchild!= null) printtree (root.rightchild);
}
/**
* Print node information
* @param nodes
*/
private void Printnodes (arraylist<node> nodes) {
System.out.println ("character Fu Quan value of the Code");
for (Node node:nodes) {
System.out.println (node.value+ "" +node.weight+ "" +node.huffcode);
}
}
}
Copy Code code as follows:
Package com.test;
Import Java.util.Map;
Import Com.huffman.HuffUtils;
Import Com.huffman.HuffmanTree;
public class Test {
public static void Main (string[] args) {
String origintext = "Abcdacaha";
Huffmantree huffmantree = new Huffmantree ();
Huffmantree.setdebug (TRUE);//test
String binary = Huffmantree.encode (Origintext);
byte[] bytes = huffutils.binary2bytes (binary);
map<string, character> codetable = huffmantree.getcodetable ();
int lastbytenum = binary.length ()% 8;
System.out.println (bytes.length);
Pass bytes, codetable, lastbytenum to the server side
Omitted......
/*
Server-Side resolution
Receive parameters and convert to bytes, Relationmap, Lastbytenum
*/
String fullbinary = huffutils.bytes2binary (bytes, lastbytenum);
SYSTEM.OUT.PRINTLN ("Server binary:" +fullbinary);
String Retrievetext = Huffmantree.decode (codetable, fullbinary);
System.out.println ("Resuming text:" +retrievetext);
}
}