学习来源
学习贺利坚老师博客
数据结构例程——串的模式匹配(KMP算法)_数据结构模式匹配例题-CSDN博客
本人引导博客
串的匹配 (KPM算法由来引导)_kpm匹配失败-CSDN博客
转载大佬sofu博客
https://www.cnblogs.com/dusf/p/kmp.html
本人详细思路引导b战讲解系列视频
【KMP算法从常规思维到解题思路(全网最全,感谢博主: sofu6)】https://www.bilibili.com/video/BV1k8411h7oV?vd_source=1de4617d8462b141bb53b6be82555d31
版本更新日志:
V1.0: 综合贺利坚老师讲解思路, 结合sofu大佬博客的递归模拟理解, 我再结合前年的博客引导, 对之前的代码进行命名优化, 加入了顺序串算法库, 让算法可视化, 让字符串更容易处理.
V1.0
功能函数
数组索引-求最大公共前后缀索引
/**************************************************
函数名: Get_index
功 能: 得到Kmp算法的模式串的数组索引
参 数: (1)Sequential_string model_String:要进行求数组索引的模式串(2)int index_array[]: 存储数组索引
注 意: 本质就是求最长公共前后缀
返回值: 无
**************************************************/
void Get_index(Sequential_string model_String, int index_array[])
{int counter = 0; //计数器int index_counter = -1; //索引计数器index_array[0] = -1; //特殊初始索引//计数器控制长度while(counter < model_String.length-1){//递归模拟对比,直到开头,加入索引,或者在前一个串的基础上,进行判断相同后,累加if(index_counter == -1 || model_String.Sequential_string_data[counter] == model_String.Sequential_string_data[index_counter]){counter++;index_counter++;index_array[counter] = index_counter;}else{index_counter = index_array[index_counter];}}
}
利用索引, KMP算法, 寻找子串在母串中的位置
/**************************************************
函数名: Find_matching_location_KMP
功 能: 利用数组索引, 实现KMP算法, 在母串中,找到子串在母串中的位置
参 数: (1)Sequential_string Mother_string:母串(2)Sequential_string Son_string:子串
思 路: 利用KMp算法, 母串计数器不回退, 子串利用数组索引,进行追忆跳跃检索对比
返回值: int: 子串在母串数组中的开始位置(-1不存在,其他为位序)
**************************************************/
int Find_matching_location_KMP(Sequential_string Mother_string, Sequential_string Son_string)
{int next_index[index_MaxSize];//索引数组int Moter_counter = 0; //母串计数器int Son_counter = 0; //子串计数器//调用索引函数Get_index(Son_string,next_index);//当母串遍历完或者子串遍历完,跳出(通过子串是否遍历完,判断是否找到)while(Moter_counter < Mother_string.length && Son_counter < Son_string.length){//当索引为-1,则代表, 要从子串开头进行对比,母串则不回退累加//当是子串和母串字符相等,则也进行累加判断if(Son_counter== -1 || Mother_string.Sequential_string_data[Moter_counter] == Son_string.Sequential_string_data[Son_counter]){Moter_counter++;Son_counter++;}else{Son_counter = next_index[Son_counter];}}//通过判断子串是否遍历完,if(Son_counter >= Son_string.length){return (Moter_counter-Son_string.length); //无需回退,所以找到就通过坐标运算即可}else{return -1;}
}
函数库
头文件
Sequential_string.h
#ifndef _SEQUENTIAL_STRING_H_INCLUDE
#define _SEQUENTIAL_STRING_H_INCLUDE#include <stdio.h>
#define MaxSize 100 //最多字符个数//顺序串数据结构
typedef struct
{char Sequential_string_data[MaxSize];//数组串数据int length; //实际串长
}Sequential_string;//(1)将一个字符串数组赋值给顺序串
void Assignment_Sequential_string(Sequential_string &New_String, char Assign_array[]);
//(2) 复制一个串,到另一个串
void Copy_Sequential_String(Sequential_string &accept_string, Sequential_string copy_string);
//(3)判断两个串是否相等
bool Equal_Sequential_String(Sequential_string judge_string1, Sequential_string judge_string2);
//(4)求顺序串串长
int Length_Sequential_String(Sequential_string measure_string);
//(5)串连接
Sequential_string Connect_Sequential_String(Sequential_string link1, Sequential_string link2);
//(6)求子串(从begin_loation开始的number个字符)
Sequential_string Get_Sequential_Substring(Sequential_string substring, int begin_loation, int number);
//(7)插入串(从从begin_loation开始插入字符串,然后组合成新的串)
Sequential_string Insert_Sequential_String(Sequential_string old_string, int begin_loation,Sequential_string insert_string);
//(8)删除串(从begin 开始的number个字符)
Sequential_string Delete_Sequential_String(Sequential_string old_string, int begin_loation,int number);
//(9)串替换(从begin 开始的number个字符)
Sequential_string Replace_Sequential_String(Sequential_string old_string, int begin_loation,int number,Sequential_string new_string);
//(10)输出展示串
void Display_Sequential_String(Sequential_string show_String);
#endif
库函数
Sequential_string.cpp
#include "Sequential_string.h"/**************************************************
(1)函数名: Assignment_Sequential_string
功 能: 将一个字符串数组赋值给顺序串
参 数: (1)Sequential_string &New_String:创建的新串(2)char Assign_array[]: 原始字符串数组
注 意: 顺序数组,结尾加入'\0'
返回值: 无
**************************************************/
void Assignment_Sequential_string(Sequential_string &New_String, char Assign_array[])
{int counter;for(counter = 0; Assign_array[counter] != '\0'; counter++){New_String.Sequential_string_data[counter] = Assign_array[counter];}New_String.Sequential_string_data[counter] = '\0';New_String.length = counter; //更新串最大位序
}/**************************************************
(2)函数名: Copy_Sequential_String
功 能: 复制一个串,到另一个串
参 数: (1)Sequential_string &accept_string: 复制成的串(2)Sequential_string copy_string:要复制的串
注 意: 复制成的串,传回的是地址,所以不用传回参数
返回值: 无
**************************************************/
void Copy_Sequential_String(Sequential_string &accept_string, Sequential_string copy_string)
{int counter;for(counter = 0; counter < copy_string.length;counter++){accept_string.Sequential_string_data[counter] = copy_string.Sequential_string_data[counter];}accept_string.Sequential_string_data[counter] = '\0';accept_string.length = copy_string.length;
}
/**************************************************
(3)函数名: Equal_Sequential_String
功 能: 判断两个串是否相等
参 数: (1)Sequential_string judge_string1:第一个串(2)Sequential_string judge_string2:第二个串
返回值: bool?是否相等,true:false
**************************************************/
bool Equal_Sequential_String(Sequential_string judge_string1, Sequential_string judge_string2)
{bool same = true;int counter;if(judge_string1.length != judge_string2.length){same = false;}else{for(counter = 0; counter < judge_string1.length;counter++){if(judge_string1.Sequential_string_data[counter] != judge_string2.Sequential_string_data[counter]){same = false;break;}}}return same;}/**************************************************
(4)函数名: Length_Sequential_String
功 能: 求顺序串串长
参 数: Sequential_string measure_string:要进行测量的串
返回值: int:顺序串长度信息
**************************************************/
int Length_Sequential_String(Sequential_string measure_string)
{return measure_string.length;
}/**************************************************
(5)函数名: Connect_Sequential_String
功 能: 把两个串连接成一个串
参 数: Sequential_string link1, Sequential_string link2:两个要链接的串
返回值: 返回Sequential_string Connection_string: 链接成的串
**************************************************/
Sequential_string Connect_Sequential_String(Sequential_string link1, Sequential_string link2)
{Sequential_string Connection_string;int counter;Connection_string.length = link1.length + link2.length;//将第一个串加入链接的串for(counter = 0; counter < link1.length; counter++){Connection_string.Sequential_string_data[counter] = link1.Sequential_string_data[counter];}//将第二个串加入链接的串for(counter = 0; counter < link2.length; counter++){Connection_string.Sequential_string_data[link1.length+counter] = link2.Sequential_string_data[counter];}Connection_string.Sequential_string_data[link1.length+counter] = '\0';return Connection_string;
}/**************************************************
(6)函数名: Get_Sequential_Substring
功 能: 求子串(从begin_loation开始的number个字符)
参 数: (1)Sequential_string mother_String:母串(2)int begin_loation:开始分割子串的位置(3)int number:子串的数量
返回值: Sequential_string son_String:得到的子串
**************************************************/
Sequential_string Get_Sequential_Substring(Sequential_string mother_String, int begin_loation, int number)
{Sequential_string son_String;int counter;son_String.length = 0;if(begin_loation <= 0 || begin_loation > mother_String.length || number < 0 || begin_loation+number-1>mother_String.length){//错误:分割的子字符串的位置错误。printf("\nError<6>:The position of the divided substring is wrong.\n");return son_String; // 参数不正确返回空串}for(counter = begin_loation-1; counter < begin_loation+number-1; counter++){son_String.Sequential_string_data[counter-begin_loation+1] = mother_String.Sequential_string_data[counter];}son_String.Sequential_string_data[counter-begin_loation+1] = '\0';son_String.length = number;return son_String;
}/**************************************************
(7)函数名: Insert_Sequential_String
功 能: 插入串(从从begin_loation开始插入字符串,然后组合成新的串)
参 数: (1)Sequential_string old_string:在原始串的基础上插入(2)int begin_loation: 插入的位置(3)Sequential_string insert_string:插入的新串
思 路: 在原有串的基础上,割开一个口子,放上新串,然后组合成新串
返回值: Sequential_string form_string:组合成的新串
**************************************************/
Sequential_string Insert_Sequential_String(Sequential_string old_string, int begin_loation,Sequential_string insert_string)
{int counter;Sequential_string form_string;form_string.length = 0;//参数不正确, 返回空串if(begin_loation <= 0 || begin_loation > old_string.length+1){//错误:插入位置错误printf("\nError<7>: wrong insertion position.\n");return form_string;}for(counter = 0; counter < begin_loation-1;counter++){form_string.Sequential_string_data[counter] = old_string.Sequential_string_data[counter];}for(counter = 0; counter < insert_string.length;counter++){form_string.Sequential_string_data[begin_loation-1+counter] = insert_string.Sequential_string_data[counter];}for(counter = begin_loation-1; counter<old_string.length;counter++){form_string.Sequential_string_data[insert_string.length+counter] = old_string.Sequential_string_data[counter];}form_string.Sequential_string_data[insert_string.length+counter] = '\0';form_string.length = old_string.length + insert_string.length;return form_string;}
/**************************************************
(8)函数名: Delete_Sequential_String
功 能: 删除串(从begin 开始的number个字符)
参 数: (1)Sequential_string old_string:在原有串的基础上删除(2)int begin_loation: 开始删除的位置(从逻辑1开始)(3)int number:删除的数量
注 意: 要判断删除的位置和数量是否正确
返回值:Sequential_string new_string:删除完后的新串
**************************************************/
Sequential_string Delete_Sequential_String(Sequential_string old_string, int begin_loation,int number)
{int counter;//定义计数器Sequential_string new_string;new_string.length = 0;//合法性判断(begin_loation理应从1开始到leng长度)if(begin_loation <= 0 || begin_loation > old_string.length || (begin_loation+number-1) > old_string.length){//错误:删除的位置或数量错误。printf("Error<8>: Wrong location or quantity of deletion.");return new_string;//返回空串}//择出删除位置之前的串for(counter = 0; counter < begin_loation-1;counter++){new_string.Sequential_string_data[counter] = old_string.Sequential_string_data[counter];}//择出删除位置之后的串for(counter = begin_loation+number-1; counter < old_string.length; counter++){new_string.Sequential_string_data[counter-number] = old_string.Sequential_string_data[counter];}new_string.Sequential_string_data[counter-number] = '\0';new_string.length = old_string.length - number;return new_string;
}/**************************************************
(9)函数名: Replace_Sequential_String
功 能: 串替换(从begin 开始的number个字符)
参 数: (1)Sequential_string old_string:原始串(2)int begin_loation:开始替换的位置(3)int number:替换的字符个数(4)Sequential_string replace_string:要替换成的字符串
思 路: 锁定old_string从begin_loation开始的number个字符,然后开始剪切建立新串,①把begin_loation之前的字符加入新串,②要替换成的串加入,③锁定后的字符加入④组合成新串,返回传出
注 意: 最后加'\0'
返回值: Sequential_string new_string:替换后,产生的新串
**************************************************/
Sequential_string Replace_Sequential_String(Sequential_string old_string, int begin_loation,int number,Sequential_string replace_string)
{int counter;Sequential_string new_string;new_string.length = 0;//合法性判断if(begin_loation <= 0 || begin_loation > old_string.length || begin_loation+number-1>old_string.length){//错误:要替换位置出现错误printf("\nError<9>: There is an error in the position to be replaced.\n");return new_string;//返回空串}//开始复制剪切for(counter = 0; counter < begin_loation-1; counter++){new_string.Sequential_string_data[counter] = old_string.Sequential_string_data[counter];}//加入要替换的串for(counter = 0; counter < replace_string.length; counter++){new_string.Sequential_string_data[begin_loation-1+counter] = replace_string.Sequential_string_data[counter];}//被替换位置,后面剩余的串for(counter = begin_loation+number-1; counter < old_string.length; counter++){new_string.Sequential_string_data[counter-number+replace_string.length] = old_string.Sequential_string_data[counter];}new_string.Sequential_string_data[counter-number+replace_string.length] = '\0';new_string.length = old_string.length - number + replace_string.length;return new_string;
}/**************************************************
(10)函数名: Display_Sequential_String
功 能: 输出展示串
参 数: Sequential_string show_String:要输出展示的串
注 意: 字符串后续可以换成自定义类型
返回值: 无
**************************************************/
void Display_Sequential_String(Sequential_string show_String)
{int counter;if(show_String.length > 0){for(counter = 0; counter < show_String.length; counter++){printf("%c", show_String.Sequential_string_data[counter]);}printf("\n");}
}
main函数调用
int main()
{Sequential_string Mother_string;Sequential_string Son_string;//子母串数组char Mother_array[] = {'a','b','a','b','c','a','b','c','a','c','b','a','b','\0'};char Son_array[] = {'c','a','b','c','a','\0'};//用顺序串来承接创建字母子串(在母串中查找子串)Assignment_Sequential_string(Mother_string,Mother_array);Assignment_Sequential_string(Son_string,Son_array);//输出展示子母串printf("\nMother_string:\n");Display_Sequential_String(Mother_string);printf("\nSon_string:\n");Display_Sequential_String(Son_string);//调用kmp算法,输出其位置(-1未找到,其他找到)printf("\n子串在母串中的位序为:%d\n",Find_matching_location_KMP(Mother_string,Son_string));return 0;}
main.cpp(包含KMP算法功能函数)
main.cpp
#include <stdio.h>
#include "Sequential_string.h"
#define index_MaxSize 100/**************************************************
函数名: Get_index
功 能: 得到Kmp算法的模式串的数组索引
参 数: (1)Sequential_string model_String:要进行求数组索引的模式串(2)int index_array[]: 存储数组索引
注 意: 本质就是求最长公共前后缀
返回值: 无
**************************************************/
void Get_index(Sequential_string model_String, int index_array[])
{int counter = 0; //计数器int index_counter = -1; //索引计数器index_array[0] = -1; //特殊初始索引//计数器控制长度while(counter < model_String.length-1){//递归模拟对比,直到开头,加入索引,或者在前一个串的基础上,进行判断相同后,累加if(index_counter == -1 || model_String.Sequential_string_data[counter] == model_String.Sequential_string_data[index_counter]){counter++;index_counter++;index_array[counter] = index_counter;}else{index_counter = index_array[index_counter];}}
}/**************************************************
函数名: Find_matching_location_KMP
功 能: 利用数组索引, 实现KMP算法, 在母串中,找到子串在母串中的位置
参 数: (1)Sequential_string Mother_string:母串(2)Sequential_string Son_string:子串
思 路: 利用KMp算法, 母串计数器不回退, 子串利用数组索引,进行追忆跳跃检索对比
返回值: int: 子串在母串数组中的开始位置(-1不存在,其他为位序)
**************************************************/
int Find_matching_location_KMP(Sequential_string Mother_string, Sequential_string Son_string)
{int next_index[index_MaxSize];//索引数组int Moter_counter = 0; //母串计数器int Son_counter = 0; //子串计数器//调用索引函数Get_index(Son_string,next_index);//当母串遍历完或者子串遍历完,跳出(通过子串是否遍历完,判断是否找到)while(Moter_counter < Mother_string.length && Son_counter < Son_string.length){//当索引为-1,则代表, 要从子串开头进行对比,母串则不回退累加//当是子串和母串字符相等,则也进行累加判断if(Son_counter== -1 || Mother_string.Sequential_string_data[Moter_counter] == Son_string.Sequential_string_data[Son_counter]){Moter_counter++;Son_counter++;}else{Son_counter = next_index[Son_counter];}}//通过判断子串是否遍历完,if(Son_counter >= Son_string.length){return (Moter_counter-Son_string.length); //无需回退,所以找到就通过坐标运算即可}else{return -1;}
}int main()
{Sequential_string Mother_string;Sequential_string Son_string;//子母串数组char Mother_array[] = {'a','b','a','b','c','a','b','c','a','c','b','a','b','\0'};char Son_array[] = {'c','a','b','c','a','\0'};//用顺序串来承接创建字母子串(在母串中查找子串)Assignment_Sequential_string(Mother_string,Mother_array);Assignment_Sequential_string(Son_string,Son_array);//输出展示子母串printf("\nMother_string:\n");Display_Sequential_String(Mother_string);printf("\nSon_string:\n");Display_Sequential_String(Son_string);//调用kmp算法,输出其位置(-1未找到,其他找到)printf("\n子串在母串中的位序为:%d\n",Find_matching_location_KMP(Mother_string,Son_string));return 0;}