【例 8.7】实现FP 树算法,并对模拟数据集 simpDat挖掘频繁项集,最小支持度为2,绘制 FP树并输出频繁项集。
运行结果:
声明:著作权归作者所有。商业转载请联系作者获得授权,非商业转载请注明出处。
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 31 10:14:48 2025@author: 破无差
"""
# - * - coding: utf-8 - * -class treeNode: # FP 树的类定义def __init__(self, nameValue, numOccur, parentNode):self.name = nameValueself.count = numOccurself.nodeLink = None # 不同项集的相同项通过 nodeLink 连接self.parent = parentNodeself.children = {}# 存储叶子节点def inc(self, numOccur): # 节点出现次数累加self.count += numOccurdef disp(self, ind=1): # 将树以文本形式显示print(' ' * ind, self.name, ' ', self.count)for child in self.children.values(): # 绘制子节点child.disp(ind + 1) # 缩进处理def createTree(dataSet, minSup=1): # 构建 FP 树headerTable = {}for trans in dataSet: # 遍历数据表中的每一行数据# 遍历每一行的每一个数据元素,统计每一项出现的次数,将次数保存在 headerTable 中for item in trans:# get 函数返回指定键的值,如果值不在字典中返回 0,其中 dataSet[trans]=1headerTable[item] = headerTable.get(item, 0) + dataSet[trans]lessThanMinsup = list(filter(lambda k:headerTable[k]<minSup, headerTable .keys()))# 遍历 headerTable 中的每一项,若一项出现的次数小于 minSup,则把该项删除for k in lessThanMinsup:del(headerTable[k])for k in list(headerTable):if headerTable[k]<minSup:del(headerTable[k])# 将出现次数在 minSup 次以上的项保存在 freqItemSet 中freqItemSet = set(headerTable.keys())if len(freqItemSet) == 0: # 如果 freqItemSet 为空,则返回 Nonereturn None, Nonefor k in headerTable:# 保存计数值及指向每种类型第一个元素的指针headerTable[k] = [headerTable[k], None]retTree = treeNode('Null Set', 1, None) # 初始化 FP 树for tranSet, count in dataSet.items(): # 遍历 dataSet 的数据,累计出现次数localD = {}for item in tranSet: #遍历一组数据中的每一项if item in freqItemSet:localD[item] = headerTable[item][0]if len(localD)>0:ordereItems = [v[0] for v in sorted(localD.items(), key=lambda p: (p[1],p[0]), reverse=True)]updateTree(ordereItems, retTree, headerTable, count)return retTree, headerTable #对 FP 树进行更新def updateTree(items, infree, headerTable, count): #返回 FP 树和头指针表if items[0] in infree.children: #更新 FP 树infree.children[items[0]].inc(count) #检查是否存在该节点else: #存在则计数增加infree.children[items[0]] = treeNode(items[0], count, infree)#创建新节点if headerTable[items[0]][1] == None: #若不存在该类别,则更新头指针列表headerTable[items[0]][1] = infree.children[items[0]]else:updateHeader(headerTable[items[0]][1], infree.children[items[0]])if len(items)>1: #仍有未分配的项updateTree(items[1:], infree.children[items[0]], headerTable, count)def updateHeader(nodeToTest, targetNode): #更新 FP 树while(nodeToTest.nodeLink !=None):nodeToTest = nodeToTest.nodeLinknodeToTest.nodeLink = targetNodedef loadSimpDat(): #创建数据集simpDat = [['11', '12', '15'],['12', '14'], ['12', '13'], ['11', '12', '14'], ['11', '13'], ['12', '13'], ['11', '13'], ['11', '12', '13', '15'],['11', '12', '13']]return simpDatdef createInitSet(dataSet):#将数据集中的数据项转换为 frozenset 并保存在字典中,其值均为 1retDict = {}for trans in dataSet:fset = frozenset(trans)retDict.setdefault(fset, 0)retDict[fset] += 1# retDict[frozenset(trans)] = 1return retDictdef ascendTree(leafNode, prefixPath): #寻找当前非空节点的前缀if leafNode.parent != None:prefixPath.append(leafNode.name) #将当前节点添加到前缀列表中ascendTree(leafNode.parent, prefixPath) #递归遍历所有前缀路径中的节点def findPrefixPath(basePat, treeNode): #返回条件模式基condPats = {}while treeNode != None:prefixPath = []ascendTree(treeNode, prefixPath) #寻找当前非空节点的前缀if len(prefixPath)>1:condPats[frozenset(prefixPath[1:])] = treeNode.count#将前缀路径保存入字典treeNode = treeNode.nodeLink #到下一个频繁项集出现的位置return condPats #返回条件模式基def mineTree(inTree, headerTable, minSup, prefix, freqItemList):#从头指针表的底端开始,递归查找频繁项集bigL = [v[0] for v in sorted(headerTable.items(), key=lambda p: str(p[1]))]for basePat in bigL:newFreqSet = prefix.copy() #加入频繁项表newFreqSet.add(basePat)freqItemList.append(newFreqSet)condPattBases = findPrefixPath(basePat, headerTable[basePat][1])#创造条件基myContTree, myHead = createTree(condPattBases, minSup)#构建条件 FP 树if myHead != None: #挖掘条件 FP 树,直到其中没有元素为止print('conditional tree for: ', newFreqSet)myContTree.disp(1)mineTree(myContTree, myHead, minSup, newFreqSet, freqItemList)if __name__ == '__main__': simpDat = loadSimpDat()initSet = createInitSet(simpDat)myFptree, myHeaderTab = createTree(initSet, 2)freqItems = []mineTree(myFptree, myHeaderTab, 2, set([]), freqItems)print(freqItems)