One, Hoffman code
Experimental purposes:
(1). Using Huffman code to encode a string
Experimental principle
How to construct a Hoffman tree:
(1). Constructs a priority queue, arranging each element from small to large.
(2). The queue node with the smallest weights is 2, and a new node is constructed to satisfy the value of the new node is the sum of the original 2 nodal weights.
(3). Press the new node into the minimum priority queue.
(4). Repeat 2.3 know that the queue is empty, this is the construction of a Hoffman tree.
Experiment steps:
(1). First arbitrary input a string, to find out the frequency of each character, there is an array freqs inside
(2). The frequency array freqs as a parameter is passed to the Create_huffman_codes function to construct the Hoffman tree.
(3). Print out each character, and its corresponding decimal encoding and binary encoding.
Specific implementation procedures:
#include <stdio.h> #include <stdlib.h> #include <string.h> #define BYTES 256 struct Huffcode {
int nbits;
int code;
};
typedef struct HUFFCODE huffcode_t;
struct HUFFHEAP {int *h;
int n, s, CS;
Long *f;
};
typedef struct HUFFHEAP heap_t;
/* Heap Handling Funcs/static heap_t *_heap_create (int s, long *f) {heap_t *h;
h = malloc (sizeof (heap_t));
H->h = malloc (sizeof (int) *s);
H->s = H->cs = S;
h->n = 0;
H->f = f;
return h;
} static void _heap_destroy (heap_t *heap) {free (heap->h);
Free (heap); #define SWAP_ (I,J) do {int t_; t_ = a[(i)]; \ a[(i)] = a[(j)]; A[(j)] = T_, while (0) static void _heap_sort (
heap_t *heap) {int i=1, j=2/* Gnome sort */int *a = heap->h; while (I < heap->n) {/* Smaller values are kept at the end */if (Heap->f[a[i-1]] >= heap->f[a[i])
{i = j, j + +;
else {SWAP_ (i-1, i);
i--; i = (i==0)?
J + +: I;
#undef swap_ static void _heap_add (heap_t *heap, int c) {if (heap->n + 1) > Heap->s) {
Heap->h = ReAlloc (heap->h, Heap->s + Heap->cs);
Heap->s + + heap->cs;
} Heap->h[heap->n] = C;
heap->n++;
_heap_sort (heap);
static int _heap_remove (heap_t *heap) {if (Heap->n > 0) {heap->n--;
Return heap->h[heap->n];
} return-1;
}/* Huffmann code Generator * * huffcode_t **create_huffman_codes (long *freqs) {huffcode_t **codes;
heap_t *heap;
Long efreqs[bytes*2];
int preds[bytes*2];
int I, extf=bytes;
int R1, R2;
memcpy (Efreqs, Freqs, sizeof (long) *bytes);
memset (&efreqs[bytes], 0, sizeof (long) *bytes);
Heap = _heap_create (bytes*2, efreqs);
if (heap = null) return null;
for (i=0 i < BYTES i++) if (Efreqs[i] > 0) _heap_add (heap, i);
while (Heap->n > 1) {r1 = _heap_remove (heap);
r2 = _heap_remove (heap);
EFREQS[EXTF] = Efreqs[r1] + EFREQS[R2];
_heap_add (heap, EXTF);
PREDS[R1] = EXTF;
PREDS[R2] =-EXTF;
extf++;
} r1 = _heap_remove (heap);
PREDS[R1] = R1;
_heap_destroy (heap);
codes = malloc (sizeof (huffcode_t *) *bytes);
int BC, BN, IX;
for (i=0 i < BYTES; i++) {bc=0; bn=0;
if (efreqs[i] = = 0) {Codes[i] = NULL; continue;}
IX = i;
while (ABS (PREDS[IX))!= ix) {BC |= ((Preds[ix) >= 0)? 1:0) << bn;
IX = ABS (Preds[ix]);
bn++;
} Codes[i] = malloc (sizeof (huffcode_t));
Codes[i]->nbits = bn;
codes[i]->code = BC;
return codes;
} void Free_huffman_codes (huffcode_t **c) {int i;
for (i=0 i < BYTES, i++) free (c[i));
Free (c);
#define MAXBITSPERCODE void Inttobits (int c, int n, char *s) {S[n] = 0;
while (n > 0) { S[n-1] = (c%2) + ' 0 '; C >>= 1;
n--;
} const char *test = "Hellohellohello";
int main () {huffcode_t **r;
int i;
Char Strbit[maxbitspercode];
const char *p;
Long freqs[bytes];
memset (freqs, 0, sizeof freqs);
p = test;
while (*p!= ' ") freqs[*p++]++;
R = create_huffman_codes (FREQS);
for (i=0 i < BYTES; i++) {if (r[i)!= NULL) {inttobits (R[i]->code, R[i]->nbits, strbit);
printf ("%c (%d)%s\n", I, R[i]->code, strbit);
} free_huffman_codes (R);
return 0; }
The test string is "Hellohellohello"
Run Result:
E (1) 01
H (0) 000
L (1) 1
O (1) # 001
If you want to use Huffman encoding to achieve text compression, just put each character and its corresponding code word constructs a dictionary in the compressed file, then the code of all characters into the compressed file, so that can be compressed, if you want to extract words in the dictionary to check the character corresponding to the code can be decompressed, Temporarily not familiar with the text operation, when more time to refine.