文章目录
- 实验目的
- 实现流程
- 代码
- 运行结果
- 测试1(含公共因子)
- 测试2(经典的i+i*i文法,且含左递归)
- 测试3(识别部分标识符)
- 总结
实验目的
实现自上而下分析的LL1语法分析器,给出分析过程
实现流程
代码
代码逻辑
1.预处理
- 去除多余空格:如“ S - > aB”,处理成“S-> aB”
- 拆解候选式:对于某一产生式,若有多个候选式,根据 | 符号拆解为多个产生式。
- 获取开始符号:默认输入的第一个非终结符为开始符
- 消除左递归和回溯(公共因子)
- 获取非终结符和终极符
2.计算first集和follow集
3.检查是否符合LL1文法
4.建立预测分析表
5.对输入串进行LL1分析
import copy
from collections import defaultdict
import pandas as pdclass LL1:def __init__(self, input_str_list):self.input_str_list = input_str_listself.formulas_dict = {} # 存储产生式 ---dict<set> 形式self.S = "" # 开始符self.Vt = [] # 终结符self.Vn = [] # 非终结符self.first = defaultdict(set) # 初始化First集合self.follow = defaultdict(set) # 初始化Follow集合self.table = {} # 预测分析表self.info = {}# 消除直接左递归def eliminate_direct_left_recursion(self, grammar, non_terminal):productions = grammar[non_terminal]recursive_productions = []alphabet_list = [chr(i) for i in range(ord('A'), ord('Z') + 1)] # A-Z,用于给新非终结符命名for production in productions: # 找到含有左递归的候选式if production.startswith(non_terminal):recursive_productions.append(production)if len(recursive_productions) > 0:# 命名为A-Z且不与原有存在的非终结符重名for ch in alphabet_list:if ch not in grammar.keys():new_non_terminal = chbreak# S = Sab \ Scd \ T \ F# 更新原始非终结符的产生式 S = (T\F) S'grammar[non_terminal] = [p + new_non_terminal for p in productions if not p.startswith(non_terminal)]# 添加新的非终结符的产生式 S'=(ab\cd) S'grammar[new_non_terminal] = [p[1:] + new_non_terminal for p in recursive_productions ifp.startswith(non_terminal)]grammar[new_non_terminal].append('@') # S'=(ab\cd)S' \ @return grammar# 往后预测,看是否会出现间接左递归def is_recruse(self, grammar, non_terminals, iidx, cur, pre):# print(f"=====cur:{cur}, pre:{pre}=====")check = Falseset_front_con = set() # pre右侧所有可能递归的vnfor pre_production in grammar[pre]:if pre_production[0].isupper():set_front_con.add(pre_production[0])# print("pre_set:", set_front_con)set_back_con = set()for i in range(iidx, len(non_terminals)): # 遍历所有非终结符 curback = cur......最后一个终结符cur_back = non_terminals[i]# print("cur_back", cur_back)if i == len(non_terminals) - 1: # 若为最后一个终结符,则加入自身set_back_con.add(cur_back)for cur_back_pro in grammar[cur_back]: # 遍历当前cur_back的候选式if cur_back_pro.startswith(cur):set_back_con.add(cur_back)# print("cur_set:", set_back_con)if len(set_front_con & set_back_con) != 0: # 有交集check = Truereturn check# 消除左递归(先间接后直接)def eliminate_left_recursion(self, grammar):non_terminals = list(grammar.keys())[::-1] # 逆序,将开始符放到最后replaced_vn = [] # 记录被替换代入掉的非终结符for i in range(len(non_terminals)): # 遍历所有非终结符cur = non_terminals[i]# 间接左递归--》直接左递归for j in range(i): # 遍历 pre1,pre2,pre3.....cur的非终结符(cur前面的终结符)pre = non_terminals[j]new_productions = []for cur_production in grammar[cur]:if cur_production.startswith(pre): # 在cur的所有候选式中,找到以pre开头的候选式if self.is_recruse(grammar, non_terminals, i, cur, pre): # 若最终能产生间接左递归,进行代入合并处理rest_str = cur_production.replace(pre, '', 1) # 截取cur的该候选式去除首字符后的剩余字符replaced_vn.append(pre)for pre_production in grammar[pre]: # 加入到pre的所有候选式后面if pre_production + rest_str not in new_productions:new_productions.append(pre_production + rest_str)else: # 不进行代入合并处理if cur_production not in new_productions:new_productions.append(cur_production)else:if cur_production not in new_productions:new_productions.append(cur_production)grammar[cur] = new_productionsgrammar = self.eliminate_direct_left_recursion(grammar, cur) # 消除当前的直接左递归# 消除冗余产生式(那些被替换代入的产生式)for vn in replaced_vn:del grammar[vn]return grammar# 消除回溯def eliminate_huisu(self, grammar):alphabet_list = [chr(i) for i in range(ord('A'), ord('Z') + 1)] # A-Z,用于给新非终结符命名while True:grammar_copy = grammar.copy()for left, right in grammar_copy.items():right = list(right)prefixes = []# 找所有项目的公共因子for i in range(len(right)):for j in range(i + 1, len(right)):str1, str2 = right[i], right[j]index = 0while index < min(len(str1), len(str2)) and str1[index] == str2[index]:index += 1if index >= 1:have = Falsefor pre in prefixes:if pre[0] == str1[0]:have = Trueif not have:if str1[:index] not in prefixes:prefixes.append(str1[:index])# =================================================================if len(prefixes) == 0:continuetmp_match = defaultdict(set)tmp_not_match = set()# for pre in prefixes:# for r_candidate in right:# if r_candidate.startswith(pre):# tmp_match[pre].add(r_candidate)for r_candidate in right:match=Falsefor pre in prefixes:if r_candidate.startswith(pre):tmp_match[pre].add(r_candidate)match=Truebreakif not match:tmp_not_match.add(r_candidate)new_ini_pro = set()for vn, right in tmp_match.items():new_r_pro = []new_vn = ""for r_candidate in right:for ch in alphabet_list: # 根据alphabet_list给new_vn命名if ch not in grammar.keys():new_vn = chbreakif r_candidate[len(vn):] == "": # 切片后为空(即只剩一个字符),则新产生式补@if "@" not in new_r_pro:new_r_pro.append('@')else:if r_candidate[len(vn):] not in new_r_pro:new_r_pro.append(r_candidate[len(vn):])grammar[new_vn] = new_r_pronew_ini_pro.add(vn + new_vn)grammar[left] = list(new_ini_pro.union(tmp_not_match))# print(grammar)if grammar_copy == grammar: # 不再发生改变,则退出whilebreakreturn grammar# 预处理def step1_pre_process(self, grammar_list):formulas_dict = {} # 存储产生式 ---dict<set> 形式S = " " # 开始符Vt = [] # 终结符Vn = [] # 非终结符for production in grammar_list:left, right = production.split('->')if "|" in right:r_list = right.split("|")formulas_dict[left] = []for r in r_list:if r not in formulas_dict[left]:formulas_dict[left].append(r)else:if left in formulas_dict.keys():formulas_dict[left].append(right)else:formulas_dict[left] = [right] # 若left不存在,会自动创建 left: 空set# 文法开始符S = list(formulas_dict.keys())[0]# 消除左递归和回溯formulas_dict = self.eliminate_left_recursion(formulas_dict)formulas_dict = self.eliminate_huisu(formulas_dict)print("=========消除左递归和回溯后的产生式=========")for left,right in formulas_dict.items():print(left+"->"+"".join(right))print("=========基本信息=========")# 获取终结符和非终结符for left, right in formulas_dict.items():if left not in Vn:Vn.append(left)for r_candidate in right:for symbol in r_candidate:if not symbol.isupper() and symbol != '@':if symbol not in Vt:Vt.append(symbol)# 打印非终结符和终结符print("开始符:", S)print("非终结符:", Vn)print("终结符:", Vt)return formulas_dict, Vn, Vt, Sdef cal_symbol_first(self, symbol):# 如果是终结符,直接加入到First集合if not symbol.isupper():self.first[symbol].add(symbol)else:for r_candidate in self.formulas_dict[symbol]:i = 0while i < len(r_candidate):next_symbol = r_candidate[i]# 如果是非终结符,递归计算其First集合if next_symbol.isupper():self.cal_symbol_first(next_symbol)self.first[symbol] = self.first[symbol].union(self.first[next_symbol] - {'@'}) # 合并first(next_symbol)/{@}if '@' not in self.first[next_symbol]:break# 如果是终结符,加入到First集合else:self.first[symbol].add(next_symbol)breaki += 1# 如果所有符号的First集合都包含ε,将ε加入到First集合if i == len(r_candidate):self.first[symbol].add('@')# 计算First集合def step2_cal_first(self, formulas_dict):# 计算所有非终结符的First集合for vn in formulas_dict.keys():self.cal_symbol_first(vn)# 计算所有终结符的First集合for vt in self.Vt:self.cal_symbol_first(vt)# 计算ε的First集self.cal_symbol_first('@')# 打印First集合for key, value in self.first.items():print(f"First({key}): {value}")# 计算Follow集合1——考虑 添加first(Vn后一个非终结符)/{ε}, 而 不考虑 添加follow(left)def cal_follow1(self, vn):self.follow[vn] = set()if vn == self.S: # 若为开始符,加入#self.follow[vn].add('#')for left, right in self.formulas_dict.items(): # 遍历所有文法,取出左部单Vn、右部候选式集合for r_candidate in right: # 遍历当前 右部候选式集合i = 0while i <= len(r_candidate) - 1: # 遍历当前 右部候选式if r_candidate[i] == vn: # ch == Vnif i + 1 == len(r_candidate): # 如果是最后一个字符 >>>>> S->....Vself.follow[vn].add('#')breakelse: # 后面还有字符 >>>>> S->...V..while i != len(r_candidate):i += 1if r_candidate[i] == vn: # 又遇到Vn,回退 >>>>> S->...V..V..i -= 1breakif r_candidate[i].isupper(): # 非终结符 >>>>> S->...VA..self.follow[vn] = self.follow[vn].union(self.first[r_candidate[i]] - {'@'})if '@' in self.first[r_candidate[i]]: # 能推空 >>>>> S->...VA.. A可推空if i + 1 == len(r_candidate): # 是最后一个字符 >>>>> S->...VA A可推空 可等价为 S->...Vself.follow[vn].add('#')breakelse: # 不能推空 >>>>> S->...VA.. A不可推空breakelse: # 终结符 >>>>> S->...Va..self.follow[vn].add(r_candidate[i])breakelse:i += 1# 计算Follow集合2——考虑 添加follow(left)def cal_follow2(self, vn):for left, right in self.formulas_dict.items(): # 遍历所有文法,取出左部单Vn、右部候选式集合for r_candidate in right: # 遍历当前 右部候选式集合i = 0while i <= len(r_candidate) - 1: # 遍历当前 右部候选式if r_candidate[i] == vn: # 找到Vnif i == len(r_candidate) - 1: # 如果当前是最后一个字符,添加 follow(left) >>>>> S->..Vself.follow[vn] = self.follow[vn].union(self.follow[left])breakelse: # 看看后面的字符能否推空 >>>>> S->..V..while i != len(r_candidate):i += 1if '@' in self.first[r_candidate[i]]: # 能推空 >>>>> S->..VB.. B可推空if i == len(r_candidate) - 1: # 且是最后一个字符 >>>>> S->..VB B可推空self.follow[vn] = self.follow[vn].union(self.follow[left])breakelse: # 不是最后一个字符,继续看 >>>>> S->..VBA.. B可推空continueelse: # 不能推空 >>>>> S->..VB.. B不可为空breaki += 1# 计算所有Follow集合的总长度,用于判断是否还需要继续完善def cal_follow_total_Len(self):total_Len = 0for vn, vn_follow in self.follow.items():total_Len += len(vn_follow)return total_Lendef step3_cal_follow(self, formulas_dict):# 先用 cal_follow1 算for vn in formulas_dict.keys():self.cal_follow1(vn)# 在循环用 cal_follow2 算, 直到所有follow集总长度不再变化,说明计算完毕while True:old_len = self.cal_follow_total_Len()for vn in formulas_dict.keys():self.cal_follow2(vn)new_len = self.cal_follow_total_Len()if old_len == new_len:break# 打印Follow集合for key, value in self.follow.items():print(f"Follow({key}): {value}")# 检测是否符合LL(1)文法def step4_check_LL1(self, formulas_dict, first, follow):# 检查每个产生式右部,多个候选式中每个候选首字符的first集是否相交(回溯)for left, right in formulas_dict.items():if len(right) >= 2:# print(f"{left}: {right}")s = set()for r_candidate in right:old_len = len(s)s = s.union(first[r_candidate[0]])new_len = len(s)if old_len == new_len:return False# 每个产生式A,若饿ε∈first(A),则first(A) ∩ follow(A) = 空集for left, right in formulas_dict.items():if '@' in first[left]:if first[left] & follow[left]: # 有交集return Falsereturn True# 建立LL(1)预测分析表def step5_create_table(self, formulas_dict, first, follow):tab_dict = {}for left, right in formulas_dict.items(): # 对于每一个产生式,求出其每个候选式的first集for r_candidate in right:idx=0cur_can_first = set()while True:if r_candidate[idx].isupper():cur_can_first = cur_can_first.union(first[r_candidate[idx]] - {'@'})else:cur_can_first.add(r_candidate[idx])idx += 1if idx >= len(r_candidate) or ('@' not in first[r_candidate[idx-1]]):breakfor fi in cur_can_first:if fi == '@':for fo in follow[left]:tab_dict[(left, fo)] = '@'else:tab_dict[(left, fi)]=r_candidatedf = pd.DataFrame(list(tab_dict.items()), columns=['Key', 'Value'])df['Vn'] = [x[0] for x in df['Key']]df['Vt'] = [x[1] for x in df['Key']]tab_df = df.pivot(index='Vn', columns='Vt', values='Value')print(tab_df)return tab_dict, tab_df# LL1分析def step6_LL1_analyse(self, s, S, Vn, Vt, table):s = list(s) # 将字符串转为list类型,方便增删s.append('#') # 末尾加入#sp = 0 # 字符串指针stack = [] # 栈stack.append('#') # 进#stack.append(S) # 进开始符msg = "" # 分析情况step = 0 # 步骤数info_step, info_stack, info_str, info_msg, info_res = [], [], [], [], ""while sp != len(s):ch = s[sp] # 获取当前输入字符top = stack[-1] # 获取栈顶元素step += 1info_step.append(step)info_stack.append(''.join(stack))info_str.append(''.join(s[sp:]))info_msg.append(msg)if top in Vt: # 栈顶元素是 终结符if top == ch:top = stack.pop() # 栈顶出栈sp += 1 # str指针后移一位msg = f"'{ch}'匹配"else:info_res = f"error: 栈顶元素{top} 与 字符{ch} 不匹配!"breakelif top in Vn: # 栈顶元素是 非终结符if (top, ch) in table.keys(): # table中含有该项top = stack.pop() # 先出栈stack.extend(reversed(table[(top, ch)])) # 逆序入栈msg = f"{top}->" + table[(top, ch)]else:# tk_show_info += f"error: table找不到匹配的({top},{ch})\n"info_res = f"error: table找不到匹配的({top},{ch})"breakelif top == '#': # 栈顶元素是 文法结束符if ch == '#':# tk_show_info += f"Success!\n"info_res = f"Success!"breakelse:# tk_show_info += f"error: 栈顶元素{top} 与 字符{ch} 不匹配!\n"info_res = f"error: 栈顶元素{top} 与 字符{ch} 不匹配!"breakelif top == '@': # 栈顶元素是 εtop = stack.pop() # 直接出栈εmsg = f"'@'出栈"continueinfo = {"info_step": info_step,"info_stack": info_stack,"info_str": info_str,"info_msg": info_msg,"info_res": info_res}return infodef init(self):self.formulas_dict, self.Vn, self.Vt, self.S = self.step1_pre_process(self.input_str_list)print("=========First、Follow=========")self.step2_cal_first(self.formulas_dict)self.step3_cal_follow(self.formulas_dict)check_res = self.step4_check_LL1(self.formulas_dict, self.first, self.follow)# =========判断是否合法=========if check_res:print("\n经过分析,该文法 符合 LL(1)文法\n")else:print("\n经过分析,该文法 不符合 LL(1)文法\n")returnprint("=========预测分析表=========")self.table, df_tab = self.step5_create_table(self.formulas_dict, self.first, self.follow)def solve(self,s):self.info = self.step6_LL1_analyse(s,self.S,self.Vn,self.Vt,self.table)print("=========分析过程=========")for i in range(len(self.info["info_step"])):print("{:<15} {:<15} {:<15} {:<15}".format(str(self.info["info_step"][i]), self.info["info_stack"][i],self.info["info_str"][i], self.info["info_msg"][i]))return self.infoif __name__ == "__main__":grammar1 = [ # abb、abcbcbcbb等等"E->abA|aB|abB|cd|cf","A->cbA|b","B->e"]grammar2 = [ # i+i*i、(i+i)*i等等"E->E+T|T","T->T*F|F","F->(E)|i"]grammar3=[ # 部分标识符文法: 形如aa、a1、aaa、aa1"E->LL|LD|LLL|LLD","L->a|b|c","D->0|1|2|3|4|5|6|7|8|9"]grammar4=[ # aad、bd、cbd、aacbd等等"S->AaS|BbS|d","A->a","B->@|c"]ll1 = LL1(grammar3)ll1.init()analyse_str="ab1"ll1.solve(analyse_str)
运行结果
测试1(含公共因子)
输入:
文法:
分析串:
输出:
测试2(经典的i+i*i文法,且含左递归)
输入:
文法:
分析串:
输出:
测试3(识别部分标识符)
输入:
文法:
分析串:
输出:
总结
实现过程中,对于消除左递归、消除回溯、first集、follow集的实现查阅了很多资料,修改了很多次代码,目前来说暂时能适用很多文法了。