I made my Huffman coding tree deploying C++ shown as below:
I created three class for this purpose - HuffmanTree, BinTree and BinNode.
You can see more details on my GitHub:https://github.com/MouChiaHung/DataStructures
Check these three files: bin_node.h, bin_tree.h and huffman_tree.h. They read source file "source", encode to file "encode" in the Huffman way, then decode the file "encode" and store results to output file "decode". Besides, Huffman table is recorded in file "table".
One of core functions is HuffmanTree::encode() which read characters from source file.
template<typename T> void amo::HuffmanTree<T>::grow(std::list<Model*>& list) { //ascendantly sorted list
Model* l;
Model* r;
Model* m;
BinNode<T>* lchild;
BinNode<T>* rchild;
BinNode<T>* vertex;
std::list<Model*>::iterator it = list.begin();
std::vector<BinNode<T>*> subs; //roots of sub-trees
typename std::vector<BinNode<T>*>::iterator it_subs = subs.begin();
int i = 0;
while (it!=list.end()) {
lchild = NULL;
rchild = NULL;
vertex = NULL;
cout << YELLOW << "while-loop:" << ++i << WHITE << endl;
if (std::next(it,1) == list.end()) { //met the last and single leaf or sub-tree
if (subs.size() > 1) {
cout << RED << "size of sub-tree is more than 1:" << subs.size() << WHITE << endl;
this->_root = subs.back();
subs.pop_back();
break;
}
else if (subs.size() == 1){
if (**it == subs.back()->data) { //met the last sub-tree
cout << GREEN << "going to attach the last sub-tree" << WHITE << endl;
vertex = subs.back();
subs.pop_back();
}
else { //met the last leaf
cout << GREEN << "going to attach the last leaf" << WHITE << endl;
r = *it;
lchild = subs.back();
subs.pop_back();
cout << CYAN << "lchild points to the root of the last sub-tree:" << *lchild;
rchild = new BinNode<T>(*r);
cout << CYAN << "rchild points to a new node:" << *rchild;
m = new Model(CHAR_VERTEX, (lchild->data.prob)+(r->prob));
vertex = new BinNode<T>(*m);
lchild->parent = vertex;
rchild->parent = vertex;
vertex->lchild = lchild;
vertex->rchild = rchild;
}
this->_root = vertex;
cout << CYAN << "root:" << *this->_root << WHITE << endl;
break;
}
else {
cout << RED << "size of sub-tree is less than 1:" << subs.size() << WHITE << endl;
this->_root = subs.back();
subs.pop_back();
break;
}
}
else {
l = *it;
it++;
r = *it;
m = new Model(CHAR_VERTEX, l->prob+r->prob);
for (it_subs=subs.begin(); it_subs!=subs.end(); it_subs++) { //set lchild if any sub-tree corresponds with this l model iterated currently
if (*l == (*it_subs)->data) {
cout << CYAN << "lchild points to the root of sub-tree:" << **it_subs;
lchild = *it_subs;
--(it_subs = subs.erase(it_subs));
}
if (lchild != NULL) break; //tricky but important
}
for (it_subs=subs.begin(); it_subs!=subs.end(); it_subs++) { //set rchild if any sub-tree corresponds with this r model iterated currently
if (*r == (*it_subs)->data) {
cout << CYAN << "rchild points to the root of sub-tree:" << **it_subs;
rchild = *it_subs;
--(it_subs = subs.erase(it_subs));
}
if (rchild != NULL) break; //tricky but important
}
if (lchild == NULL) { //set lchild with a new node if no any sub-tree corresponds with this l model iterated currently, which means meeting a row leaf
lchild = new BinNode<T>(*l);
cout << CYAN << "lchild points to a new node:" << *lchild;
}
if (rchild == NULL) { //set rchild with a new node if no any sub-tree corresponds with this r model iterated currently, which means meeting a row leaf
rchild = new BinNode<T>(*r);
cout << CYAN << "rchild points to a new node:" << *rchild;
}
vertex = new BinNode<T>(*m);
std::cout << GREEN << "growing..." << WHITE << endl;
std::cout << CYAN << "lchild" << *lchild << WHITE;
std::cout << CYAN << "rchild" << *rchild << WHITE;
std::cout << CYAN << "vertex" << *vertex << WHITE;
lchild->parent = vertex;
rchild->parent = vertex;
vertex->lchild = lchild;
vertex->rchild = rchild;
subs.push_back(vertex);
for (std::list<Model*>::iterator itt=it;itt!=list.end();itt++) {
if ((*m < **itt) || (*m == **itt)) {
list.insert(itt, m);
break;
}
else if (std::next(itt,1) == list.end()) {
list.push_back(m);
break;
}
}
it++;
}
}
this->updateHeightAll();
cout << GREEN << "-*-*-*-*-*-*-*-* Huffman tree top -*-*-*-*-*-*-*-*" << WHITE << endl;
this->traverseLevel();
cout << GREEN << "-*-*-*-*-*-*-*-* Huffman tree bottom -*-*-*-*-*-*-*-*" << WHITE << endl;
subs.clear();}
Another core function is Huffman::grow() which makes a binary tree for PFC coding.
template<typename T> void amo::HuffmanTree<T>::grow(std::list<Model*>& list) { //ascendantly sorted list
Model* l;
Model* r;
Model* m;
BinNode<T>* lchild;
BinNode<T>* rchild;
BinNode<T>* vertex;
std::list<Model*>::iterator it = list.begin();
std::vector<BinNode<T>*> subs; //roots of sub-trees
typename std::vector<BinNode<T>*>::iterator it_subs = subs.begin();
int i = 0;
while (it!=list.end()) {
lchild = NULL;
rchild = NULL;
vertex = NULL;
cout << YELLOW << "while-loop:" << ++i << WHITE << endl;
if (std::next(it,1) == list.end()) { //met the last and single leaf or sub-tree
if (subs.size() > 1) {
cout << RED << "size of sub-tree is more than 1:" << subs.size() << WHITE << endl;
this->_root = subs.back();
subs.pop_back();
break;
}
else if (subs.size() == 1){
if (**it == subs.back()->data) { //met the last sub-tree
cout << GREEN << "going to attach the last sub-tree" << WHITE << endl;
vertex = subs.back();
subs.pop_back();
}
else { //met the last leaf
cout << GREEN << "going to attach the last leaf" << WHITE << endl;
r = *it;
lchild = subs.back();
subs.pop_back();
cout << CYAN << "lchild points to the root of the last sub-tree:" << *lchild;
rchild = new BinNode<T>(*r);
cout << CYAN << "rchild points to a new node:" << *rchild;
m = new Model(CHAR_VERTEX, (lchild->data.prob)+(r->prob));
vertex = new BinNode<T>(*m);
lchild->parent = vertex;
rchild->parent = vertex;
vertex->lchild = lchild;
vertex->rchild = rchild;
}
this->_root = vertex;
cout << CYAN << "root:" << *this->_root << WHITE << endl;
break;
}
else {
cout << RED << "size of sub-tree is less than 1:" << subs.size() << WHITE << endl;
this->_root = subs.back();
subs.pop_back();
break;
}
}
else {
l = *it;
it++;
r = *it;
m = new Model(CHAR_VERTEX, l->prob+r->prob);
for (it_subs=subs.begin(); it_subs!=subs.end(); it_subs++) { //set lchild if any sub-tree corresponds with this l model iterated currently
if (*l == (*it_subs)->data) {
cout << CYAN << "lchild points to the root of sub-tree:" << **it_subs;
lchild = *it_subs;
--(it_subs = subs.erase(it_subs));
}
if (lchild != NULL) break; //tricky but important
}
for (it_subs=subs.begin(); it_subs!=subs.end(); it_subs++) { //set rchild if any sub-tree corresponds with this r model iterated currently
if (*r == (*it_subs)->data) {
cout << CYAN << "rchild points to the root of sub-tree:" << **it_subs;
rchild = *it_subs;
--(it_subs = subs.erase(it_subs));
}
if (rchild != NULL) break; //tricky but important
}
if (lchild == NULL) { //set lchild with a new node if no any sub-tree corresponds with this l model iterated currently, which means meeting a row leaf
lchild = new BinNode<T>(*l);
cout << CYAN << "lchild points to a new node:" << *lchild;
}
if (rchild == NULL) { //set rchild with a new node if no any sub-tree corresponds with this r model iterated currently, which means meeting a row leaf
rchild = new BinNode<T>(*r);
cout << CYAN << "rchild points to a new node:" << *rchild;
}
vertex = new BinNode<T>(*m);
std::cout << GREEN << "growing..." << WHITE << endl;
std::cout << CYAN << "lchild" << *lchild << WHITE;
std::cout << CYAN << "rchild" << *rchild << WHITE;
std::cout << CYAN << "vertex" << *vertex << WHITE;
lchild->parent = vertex;
rchild->parent = vertex;
vertex->lchild = lchild;
vertex->rchild = rchild;
subs.push_back(vertex);
for (std::list<Model*>::iterator itt=it;itt!=list.end();itt++) {
if ((*m < **itt) || (*m == **itt)) {
list.insert(itt, m);
break;
}
else if (std::next(itt,1) == list.end()) {
list.push_back(m);
break;
}
}
it++;
}
}
this->updateHeightAll();
cout << GREEN << "-*-*-*-*-*-*-*-* Huffman tree top -*-*-*-*-*-*-*-*" << WHITE << endl;
this->traverseLevel();
cout << GREEN << "-*-*-*-*-*-*-*-* Huffman tree bottom -*-*-*-*-*-*-*-*" << WHITE << endl;
subs.clear();}
And Huffman::generate() creates table for coding content.
template<typename T> void amo::HuffmanTree<T>::generate() {
std::string code = "";
std::queue<BinNode<T>*> queue;
BinNode<T>* node = this->_root;
BinNode<T>* tmp;
queue.push(node);
int i = 0;
while (true) {
if (queue.empty()) break;
node = queue.front();
queue.pop();
cout << YELLOW << "while-loop:" << ++i << ", node:" << *node << WHITE << endl;
if (node->data.c == CHAR_VERTEX) {
//do nothing
}
else {
if (node->isLeaf()) code = "";
tmp = node;
while (tmp!=NULL) {
if (tmp->isLeftChild()) code.insert(0, "0");
else if (tmp->isRightChild()) code.insert(0, "1");
tmp = tmp->parent;
}
if (node->data.c != CHAR_VERTEX) codes[node->data.c] = code;
}
if (node->hasLeftChild()) queue.push(node->lchild);
if (node->hasRightChild()) queue.push(node->rchild);
}
for (std::map<char,string>::iterator it=codes.begin();it!=codes.end();it++) {
cout << YELLOW << "codes[" << distance(codes.begin(),it) << "]:" << " key:" << it->first << " => value:" << it->second << WHITE << endl;
}}
Thanks, any suggestion is welcome.