基于哈夫曼编码完成的文件压缩及解压

这几天在较为认真的研究基于哈夫曼编码的文件压缩及解压，费了点时间，在这分享一下：

这里用链式结构，非顺序表结构；

文件压缩：

1.获取文件信息（这里采用TXT格式文本）；

2.压缩文件；

3.写配置文件（便于解压时用，无非就是存放原文件的索引之类的，比如说，文件中某个字符出现的个数，记录下来）

4.解压缩，使用压缩后的文件和配置文件解压文件；

5.用比对软件，比对解压后的文件和源文件是否相同；

下面慢慢解析：

先看一个文件信息类：

typedef long long LongType;
struct FileInfo
{unsigned char _ch;       //字符LongType _count;         //字符出现次数string _code;            //字符对应的哈夫曼编码 FileInfo(unsigned char ch = 0):_ch(ch),_count(0){}FileInfo operator+(const FileInfo& x){FileInfo tmp;tmp._count = this->_count + x._count;return tmp;}bool operator !=(const FileInfo& x) const{return this->_count != x._count;}
};bool operator<(const FileInfo info1,const FileInfo info2)
{return info1._count < info2._count;
}

此为一个文件信息的类结构，包含字符，字符对应出现的次数，以及这个字符对应的哈夫曼编码（能看到这篇博客的星弟，对哈夫曼编码不会陌生，这里不再强调）

除了统计字符出现的次数及哈夫曼编码，还完成了几个运算符的重载

要获取哈夫曼编码，就得建立哈夫曼树，建立哈夫曼树用最小堆取操作，以下是最小堆建立过程

// 小堆
template<class T>
struct Less
{bool operator() (const T& l, const T& r){return l < r; // operator<}};template<class T>
struct Greater
{bool operator() (const T& l, const T& r){return l > r; // operator<}
};template<class T, class Compare = Less<T>>
class Heap
{
public:Heap(){}Heap(const T* a, size_t size){for (size_t i = 0; i < size; ++i){_arrays.push_back(a[i]);}// 建堆for(int i = (_arrays.size()-2)/2; i >= 0; --i){AdjustDown(i);}}void Push(const T& x){_arrays.push_back(x);AdjustUp(_arrays.size()-1);}void Pop(){assert(_arrays.size() > 0);swap(_arrays[0], _arrays[_arrays.size() - 1]);_arrays.pop_back();AdjustDown(0);}T& Top(){assert(_arrays.size() > 0);return _arrays[0];}bool Empty(){return _arrays.empty();}int Size(){return _arrays.size();}void AdjustDown(int root){int child = root*2 + 1;// 	Compare com;while (child < _arrays.size()){// 比较出左右孩子中小的那个if (child+1<_arrays.size() &&*_arrays[child+1] < _arrays[child])//if(child+1<_arrays.size() &&//	com(_arrays[child+1],_arrays[child])){++child;}if(*_arrays[child] < _arrays[root])//if(com(_arrays[child],_arrays[root])){swap(_arrays[child], _arrays[root]);root = child;child = 2*root+1;}else{break;}}}void AdjustUp(int child){int parent = (child-1)/2;//while (parent >= 0)while (child > 0){if (*_arrays[child] < _arrays[parent]){swap(_arrays[parent], _arrays[child]);child = parent;parent = (child-1)/2;}else{break;}}}public:vector<T> _arrays;
};

最小堆里也完成了很多接口，包括push pop等

然后就是几个压缩和解压的函数接口

1.根据哈夫曼树获取哈夫曼变慢：

	void _GenerateHuffmanCode(HuffmanTreeNode<FileInfo>* root){if (root == nullptr){return;}_GenerateHuffmanCode(root->_left);_GenerateHuffmanCode(root->_right);//当前节点为叶子节点为空  才生成哈夫曼编码if (root->_left == nullptr && root->_right == nullptr){HuffmanTreeNode<FileInfo>* cur = root;HuffmanTreeNode<FileInfo>* parent = cur->_parent;string& code = _infos[cur->_weight._ch]._code;while (parent){if (parent->_left == cur){code += '1';}else if (parent->_right == cur){code += '0';}cur = parent;parent = cur->_parent;}reverse(code.begin(), code.end());}}

2.根据最小堆建立哈夫曼树；

void CreateTree(T *a, size_t size, const T& invalid){assert(a);Heap<HuffmanTreeNode<T>*> s1;  //草 终于发现问题  在这里   （堆里放的是指针，类型一定要对）//找两个最小的元素for (size_t i = 0; i < size; ++i){if (a[i] != invalid){HuffmanTreeNode<T>* node = new HuffmanTreeNode<T>(a[i]);s1.Push(node);}}while (s1.Size() > 1){HuffmanTreeNode<T>* left = s1.Top();s1.Pop();HuffmanTreeNode<T>* right = s1.Top();s1.Pop();HuffmanTreeNode<T>* parent = new HuffmanTreeNode<T>(left->_weight + right->_weight);parent->_left = left;parent->_right = right;left->_parent = parent;right->_parent = parent;s1.Push(parent);}_root = s1.Top();s1.Pop();}

3.读取文本文件中的一行：

	bool _ReadLine(FILE *fOutLogFile, string& line){char ch = fgetc(fOutLogFile);if (feof(fOutLogFile))return false;else{if (ch == '\n'){line += ch;ch = fgetc(fOutLogFile);}while (ch != '\n'){line += ch;ch = fgetc(fOutLogFile);}return true;}}

4.文件压缩

	//文件压缩bool Compress(const char* filename){//1.打开一个文件，统计文件字符出现的次数//2.生成对应的哈弗曼编码//3.压缩文件//4.写配置文件，方便解压缩assert(filename);FILE *fOut = fopen(filename, "rb");assert(fOut);//统计文件字符出现的次数unsigned char ch = fgetc(fOut);while (!feof(fOut))  //文件结束{_infos[ch]._count++;ch = fgetc(fOut);}HuffmanTree<FileInfo> ht;FileInfo invalid;ht.CreateTree(_infos, 256, invalid);//哈夫曼编码_GenerateHuffmanCode(ht.GetRoot());string compressFile = filename;compressFile += ".huf";//压缩后的文件名 后缀为《输入文件名+.huf》FILE *finCompress = fopen(compressFile.c_str(), "wb"); //获取string中的C字符串assert(finCompress);fseek(fOut, 0, SEEK_SET);//将文件指针移到开头char cha = fgetc(fOut);unsigned char inch = 0;int index = 0;  //一个字节的八位while (!feof(fOut)){string& code = _infos[(unsigned char)cha]._code;for (size_t i = 0; i < code.size(); ++i){inch <<= 1;     //低位向高位进if (code[i] == '1'){inch |= 1;}if (++index == 8){fputc(inch, finCompress); //够8位，装进文件index = 0;   //重新一轮开始inch = 0;}}cha = fgetc(fOut);}fclose(fOut);//如果index = 0 说明 上边8位刚好存满 不等 下一个自己又出来了if (index != 0)   //处理最后一个字符不够的问题{inch <<= (8 - index); //最高位必须装上 后边的浪费掉fputc(inch, finCompress);}fclose(finCompress);}

5.写配置文件：

string logFile = filename;logFile += ".log";FILE *Log = fopen(logFile.c_str(), "wb");assert(Log);string chInfo;char str[128] = {0}; //没空间 不可以for (size_t i = 1; i < 256; ++i){if (_infos[i]._count > 0){chInfo += _infos[i]._ch;chInfo += ',';chInfo += _itoa(_infos[i]._count,str,10);chInfo += '\n';fputs(chInfo.c_str(), Log);chInfo.clear();}}fclose(Log);

6.最后的文件解压：

//重构文件void _RestoreFiles(HuffmanTreeNode<FileInfo> *root, const char* Fileneme,long long size){assert(root);//原压缩文件string name = Fileneme;name += ".huf";FILE* Out = fopen(name.c_str(),"rb");assert(Out);string restorefilename = Fileneme;restorefilename += ".over";FILE *over = fopen(restorefilename.c_str(),"wb");assert(over);int pos = 8;long long poss = size;unsigned char chz = fgetc(Out);while (poss>0){HuffmanTreeNode<FileInfo>* cur = nullptr;cur = root;while (cur->_left != nullptr || cur->_right != nullptr){pos--;unsigned char temp = chz >> pos;int ch = 1 & temp;if (ch == 0){cur = cur->_right;}else if (ch == 1){cur = cur->_left;}if (pos == 0){chz = fgetc(Out);pos = 8;}}fputc(cur->_weight._ch, over);poss--;}fclose(Out);fclose(over);}void UnCompress(const char* Fileneme)//解压缩{//1.打开日志文件//2.根据信息还原哈夫曼树//3.还原信息；string UnCompressneme = Fileneme;UnCompressneme += ".log";FILE *fOutLogFile = fopen(UnCompressneme.c_str(), "rb");assert(fOutLogFile);string line;while (_ReadLine(fOutLogFile, line)){unsigned char ch = line[0];_infos[ch]._count = atoi(line.substr(2).c_str());line.clear();} HuffmanTree<FileInfo> f;FileInfo invalid;f.CreateTree(_infos, 256, invalid);//根据重建的哈夫曼树 还原文件；long long size = f.GetRoot()->_weight._count;_RestoreFiles(f.GetRoot(), Fileneme,size);}

到此，此项目基本完成；如遇问题，希望留言，随时解答，如有见解，跪求赐教！

转载于:https://www.cnblogs.com/li-ning/p/9490022.html

本文来自互联网用户投稿，该文观点仅代表作者本人，不代表本站立场。本站仅提供信息存储空间服务，不拥有所有权，不承担相关法律责任。如若转载，请注明出处：http://www.mzph.cn/news/490842.shtml

如若内容造成侵权/违法违规/事实不符，请联系多彩编程网进行投诉反馈email:809451989@qq.com，一经查实，立即删除！