数据结构(哈夫曼树+KMP)之 数据加密+解密
原理:参考趣学数据结构
代码:
#include<stdio.h>
#include<stdlib.h>
#define N 100
#define INF 2^31-1
int next[N];
int Sum = 0;//权重总和
typedef struct fNode {//哈夫曼树中每个节点的信息int c;//字符int parent;//父节点,左右孩子,权重int lchild, rchild;int weight;
}fNode;
typedef struct rNode {//存储单个的编码字符的编码序列int r[N];int start;//有效编码起始的位置int length;
}rNode;
void huffMan(fNode fnode[], int n) {//构造哈夫曼树 选取二个最小的没有父节点的结点合并,以此类推for (int i = 0; i < n - 1; i++) {//n个字符n-1次的构造即可构造哈夫曼树int min1 = INF, min2 = INF;int u = -1, v = -1;for (int j = 0; j < n + i; j++) {//找二个最小的没有父节点的if (fnode[j].weight < min1 && fnode[j].parent == -1) {min2 = min1;//最值同时往前推v = u;min1 = fnode[j].weight;u = j;}else if (fnode[j].weight < min2 && fnode[j].parent == -1) {min2 = fnode[j].weight;v = j;}}fnode[n + i].weight = min1 + min2;fnode[n + i].lchild = u;fnode[n + i].rchild = v;fnode[u].parent = fnode[v].parent = n + i;//更新父节点}
}
void findHuffManCodePath(fNode fnode[], rNode rnode[], int n) {//寻找每个字符编码表示rNode temp;int start = n - 1;//最坏的哈夫曼树为一条链表for (int i = 0; i < n; i++) {start = n - 1;//每个字符的编码从叶子节点向根节点遍历int p = fnode[i].parent;int tempv = i;while (p != -1) {//p不等于-1表示有父节点if (tempv == fnode[p].lchild) {temp.r[start] = 0;}else {temp.r[start] = 1;}start--;tempv = p;p = fnode[p].parent;}for (int j = start + 1; j <= n - 1; j++) {//更新每个字符的编码数组rnode[i].r[j] = temp.r[j];rnode[i].start = start + 1;}rnode[i].length = n - start - 1;}printf("enCode:\n");int sum = 0;//遍历每个字符的编码数组for (int j = 0; j < n; j++) {//n个字符的编码遍历printf("%d的哈夫曼编码为:", fnode[j].c);for (int k = rnode[j].start; k <= n - 1; k++) {printf("%d", rnode[j].r[k]);}sum += (fnode[j].weight*rnode[j].length);printf(" ");}Sum = sum;printf("\n");printf("哈夫曼编码长度为:%d\n", sum);printf("\n");
}
void getNext(int *T, int *next, int m) {//求解当前字符前面的最大公共前缀和后缀int j = 1, k = 0;next[j] = 0;//从1开始计算while (j <= m) {if (k == 0 || T[k] == T[j]) {//从下标0开始计算++j;++k;//next[j] = k;if (T[k] == T[j]) {//改进的更新next数组的方法,减少不必要的回退next[j] = next[k];//没比较的可能}else {//也就是只有不相等的时候才有比较的可能next[j] = k;//与当前k位置的字符比较}}else {k = next[k];//回退查找前面的最大公共前缀和后缀}}/*printf("next数组值:");for (int i = 1; i <= m; i++) {printf("%d ", next[i]);}printf("\n");*/
}
int KMP(int * S, int* T, int pos, int n, int m) {//KMP算法进行模式匹配并替换掉解码的数据int i = pos, j = 1;while (i <= n && j <= m) {//不能在这里使用i<=n-m+1,否则可能会破坏(截断)匹配成功if (i > n - m + 1 && j == 1) {break;//再减少一点比较的次数}if (j == 0 || S[i] == T[j]) {i++;j++;}else {j = next[j];//根据最大公共前缀和后缀计算的next数组,j回退而i不回退}}//printf("\n--- %d ---\n", j);if (j == m + 1) {//返回查找成功子串的初始位置 ==不能写成= 写>更安全//printf("查找成功子串的初始位置为:%d\n", i);//return i - j;return i;}//printf("查找子串失败!\n");return -1;
}
/* 错误的逻辑方式
void deCode(int* enCodes, fNode* fnode, rNode* rnode,int n) {//对编码的字符进行解码for (int i = 0; i <n; i++) {int t = 1, tt = n - 1 - rnode[i].start +1;int temp[N];temp[0] = -1;for (int k = rnode[i].start; k <= n - 1; k++) {temp[t++]=rnode[i].r[k];printf("%d", temp[t-1]);}printf("\n");getNext(temp, next, tt);//计算next数组(最大公共前缀和后缀长度)printf("\n");int bools;bools = KMP(enCodes, temp, 1, Sum, tt, fnode[i].c);//模式匹配while (true) {if (bools == -1) {//break;}else {bools = KMP(enCodes, temp, bools, Sum, tt, fnode[i].c);}printf("\n");}}for (int i = 1; i <= Sum; i++) {if (enCodes[i] != -1) {//解码的字符遍历printf("%d", enCodes[i]);}}printf("\n");
}*/
void deCode(int* enCodes, fNode* fnode, rNode* rnode, int n) {//对编码的字符进行解码int ii=1;while(ii<=Sum) {//每前进一下for (int i = 0; i < n; i++) {//遍历查找所有的编码,即逐个地后移int t = 1, tt = n - 1 - rnode[i].start + 1;int temp[N];temp[0] = -1;for (int k = rnode[i].start; k <= n - 1; k++) {temp[t++] = rnode[i].r[k];//printf("%d", temp[t - 1]);}getNext(temp, next, tt);//计算next数组(最大公共前缀和后缀长度)int bools = KMP(enCodes, temp, ii, Sum, tt);//模式匹配if (bools != -1&&bools-ii==tt) {ii = bools;enCodes[ii - tt] =fnode[i].c;//解码for (int kk = ii - 1; kk > ii - tt; kk--) {enCodes[kk] = -1;//解码}/*for (int ik = 1; ik <= Sum; ik++) {if (enCodes[ik] != -1) {//解码的字符遍历printf("%d", enCodes[ik]);}}*/break;//查找成功,解码下一个字符}}}for (int i = 1; i <= Sum; i++) {if (enCodes[i] != -1) {//解码的字符遍历printf("%d", enCodes[i]);}}printf("\n");
}
int main() {printf("请输入要编码的数字:\n");fNode fnode[N];rNode rnode[N];int numbers[N];//原始数据int copyNumbers[N];//备份原始数据int enCodes[N];//字符编码int vNumber=1,indexs=0;while (vNumber != -1) {//输入,-1代表结束的输入的标志scanf_s("%d", &vNumber);if (vNumber != -1) {numbers[indexs] = vNumber;copyNumbers[indexs] = vNumber;indexs++;}}//统计字符数int copyIndex = 0,k=0;while (copyIndex < indexs) {int tempvv;if (copyNumbers[copyIndex] != -1) {tempvv = copyNumbers[copyIndex];//临时保存int counts = 0;for (int temIndex = copyIndex; temIndex < indexs; temIndex++) {if (tempvv == copyNumbers[temIndex]) {copyNumbers[temIndex] = -1;counts++;//统计字符数}}fnode[k].c = tempvv;fnode[k].weight = counts;k++;printf("%d:%d ", tempvv, counts);}copyIndex++;//向后移一步}printf("\n");int length = k;int u = 2 * length - 1;//哈夫曼总共有2n-1个结点for (int i = 0; i < u; i++) {//初始化结点的左右孩子和父节点信息fnode[i].lchild = -1;fnode[i].rchild = -1;fnode[i].parent = -1;}huffMan(fnode, length);findHuffManCodePath(fnode, rnode, length);printf("enSingleCode:\n");int starts = 1;enCodes[0] = -1;for (int startsi = 0; startsi < indexs; startsi++) {int tempValue = numbers[startsi];int tti;for (tti = 0; tti < length; tti++) {if (tempValue == fnode[tti].c) {break;//找到当前单个的字符对于的编码}}for (int ttti = rnode[tti].start; ttti <length ; ttti++) {//编码enCodes[starts++] = rnode[tti].r[ttti];printf("%d", rnode[tti].r[ttti]);}}printf("\ndeCode:\n");deCode(enCodes, fnode, rnode, length);//解码system("pause");return 0;
}
测试截图:
时间复杂度O(n x n x n),空间复杂度O(n) 辅助数组
彩蛋:
1.后期将更新为字符数组而不是整型数组,减少内存消耗,当然也可以是模板类型,有一部分decode注释的地方有问题,感兴趣的可以评论交流下,激发你们的思考,注释部分decode函数解密为死循环!!!
2.当然后期也将使用文件操作读文件加密和解密!
3.因为不同的编码方式不同,大家可以改进,不要每次都左树为0,右树为1,这样很容易被别人解密,可以适当地调整不同层的子树根节点的左右01编码加密方式,当然也可以是其他数字或字符作为左右加密符号!!!
打印调试了2天代码!!!