说明
如果需要用到这些知识却没有掌握,则会让人感到沮丧,也可能导致面试被拒。无论是花几天时间“突击”,还是利用零碎的时间持续学习,在数据结构上下点功夫都是值得的。那么Python 中有哪些数据结构呢?列表、字典、集合,还有……栈?Python 有栈吗?本系列文章将给出详细拼图。
13章: Binary Tree
The binary Tree: 二叉树,每个节点做多只有两个子节点
class _BinTreeNode:def __init__(self, data):self.data = dataself.left = Noneself.right = None# 三种depth-first遍历 def preorderTrav(subtree):""" 先(根)序遍历"""if subtree is not None:print(subtree.data)preorderTrav(subtree.left)preorderTrav(subtree.right)def inorderTrav(subtree):""" 中(根)序遍历"""if subtree is not None:preorderTrav(subtree.left)print(subtree.data)preorderTrav(subtree.right)def postorderTrav(subtree):""" 后(根)序遍历"""if subtree is not None:preorderTrav(subtree.left)preorderTrav(subtree.right)print(subtree.data)# 宽度优先遍历(bradth-First Traversal): 一层一层遍历, 使用queue def breadthFirstTrav(bintree):from queue import Queue # py3q = Queue()q.put(bintree)while not q.empty():node = q.get()print(node.data)if node.left is not None:q.put(node.left)if node.right is not None:q.put(node.right)class _ExpTreeNode:__slots__ = ('element', 'left', 'right')def __init__(self, data):self.element = dataself.left = Noneself.right = Nonedef __repr__(self):return '<_ExpTreeNode: {} {} {}>'.format(self.element, self.left, self.right)from queue import Queue class ExpressionTree:"""表达式树: 操作符存储在内节点操作数存储在叶子节点的二叉树。(符号树真难打出来)*/ \+ -/ \ / \9 3 8 4(9+3) * (8-4)Expression Tree Abstract Data Type,可以实现二元操作符ExpressionTree(expStr): user string as constructor paramevaluate(varDict): evaluates the expression and returns the numeric resulttoString(): constructs and retutns a string represention of the expressionUsage:vars = {'a': 5, 'b': 12}expTree = ExpressionTree("(a/(b-3))")print('The result = ', expTree.evaluate(vars))"""def __init__(self, expStr):self._expTree = Noneself._buildTree(expStr)def evaluate(self, varDict):return self._evalTree(self._expTree, varDict)def __str__(self):return self._buildString(self._expTree)def _buildString(self, treeNode):""" 在一个子树被遍历之前添加做括号,在子树被遍历之后添加右括号 """# print(treeNode)if treeNode.left is None and treeNode.right is None:return str(treeNode.element) # 叶子节点是操作数直接返回else:expStr = '('expStr += self._buildString(treeNode.left)expStr += str(treeNode.element)expStr += self._buildString(treeNode.right)expStr += ')'return expStrdef _evalTree(self, subtree, varDict):# 是不是叶子节点, 是的话说明是操作数,直接返回if subtree.left is None and subtree.right is None:# 操作数是合法数字吗if subtree.element >= '0' and subtree.element <= '9':return int(subtree.element)else: # 操作数是个变量assert subtree.element in varDict, 'invalid variable.'return varDict[subtree.element]else: # 操作符则计算其子表达式lvalue = self._evalTree(subtree.left, varDict)rvalue = self._evalTree(subtree.right, varDict)print(subtree.element)return self._computeOp(lvalue, subtree.element, rvalue)def _computeOp(self, left, op, right):assert opop_func = {'+': lambda left, right: left + right, # or import operator, operator.add'-': lambda left, right: left - right,'*': lambda left, right: left * right,'/': lambda left, right: left / right,'%': lambda left, right: left % right,}return op_func[op](left, right)def _buildTree(self, expStr):expQ = Queue()for token in expStr: # 遍历表达式字符串的每个字符expQ.put(token)self._expTree = _ExpTreeNode(None) # 创建root节点self._recBuildTree(self._expTree, expQ)def _recBuildTree(self, curNode, expQ):token = expQ.get()if token == '(':curNode.left = _ExpTreeNode(None)self._recBuildTree(curNode.left, expQ)# next token will be an operator: + = * / %curNode.element = expQ.get()curNode.right = _ExpTreeNode(None)self._recBuildTree(curNode.right, expQ)# the next token will be ')', remmove itexpQ.get()else: # the token is a digit that has to be converted to an int.curNode.element = tokenvars = {'a': 5, 'b': 12} expTree = ExpressionTree("((2*7)+8)") print(expTree) print('The result = ', expTree.evaluate(vars))
Heap(堆):二叉树最直接的一个应用就是实现堆。堆就是一颗完全二叉树,最大堆的非叶子节点的值都比孩子大,最小堆的非叶子结点的值都比孩子小。 python内置了heapq模块帮助我们实现堆操作,比如用内置的heapq模块实现个堆排序:
# 使用python内置的heapq实现heap sort def heapsort(iterable):from heapq import heappush, heappoph = []for value in iterable:heappush(h, value)return [heappop(h) for i in range(len(h))]
但是一般实现堆的时候实际上并不是用数节点来实现的,而是使用数组实现,效率比较高。为什么可以用数组实现呢?因为完全二叉树的性质, 可以用下标之间的关系表示节点之间的关系,MaxHeap的docstring中已经说明了
class MaxHeap:"""Heaps:完全二叉树,最大堆的非叶子节点的值都比孩子大,最小堆的非叶子结点的值都比孩子小Heap包含两个属性,order property 和 shape property(a complete binary tree),在插入一个新节点的时候,始终要保持这两个属性插入操作:保持堆属性和完全二叉树属性, sift-up 操作维持堆属性extract操作:只获取根节点数据,并把树最底层最右节点copy到根节点后,sift-down操作维持堆属性用数组实现heap,从根节点开始,从上往下从左到右给每个节点编号,则根据完全二叉树的性质,给定一个节点i, 其父亲和孩子节点的编号分别是:parent = (i-1) // 2left = 2 * i + 1rgiht = 2 * i + 2使用数组实现堆一方面效率更高,节省树节点的内存占用,一方面还可以避免复杂的指针操作,减少调试难度。"""def __init__(self, maxSize):self._elements = Array(maxSize) # 第二章实现的Array ADTself._count = 0def __len__(self):return self._countdef capacity(self):return len(self._elements)def add(self, value):assert self._count < self.capacity(), 'can not add to full heap'self._elements[self._count] = valueself._count += 1self._siftUp(self._count - 1)self.assert_keep_heap() # 确定每一步add操作都保持堆属性def extract(self):assert self._count > 0, 'can not extract from an empty heap'value = self._elements[0] # save root valueself._count -= 1self._elements[0] = self._elements[self._count] # 最右下的节点放到root后siftDownself._siftDown(0)self.assert_keep_heap()return valuedef _siftUp(self, ndx):if ndx > 0:parent = (ndx - 1) // 2# print(ndx, parent)if self._elements[ndx] > self._elements[parent]: # swapself._elements[ndx], self._elements[parent] = self._elements[parent], self._elements[ndx]self._siftUp(parent) # 递归def _siftDown(self, ndx):left = 2 * ndx + 1right = 2 * ndx + 2# determine which node contains the larger valuelargest = ndxif (left < self._count andself._elements[left] >= self._elements[largest] andself._elements[left] >= self._elements[right]): # 原书这个地方没写实际上找的未必是largestlargest = leftelif right < self._count and self._elements[right] >= self._elements[largest]:largest = rightif largest != ndx:self._elements[ndx], self._elements[largest] = self._elements[largest], self._elements[ndx]self._siftDown(largest)def __repr__(self):return ' '.join(map(str, self._elements))def assert_keep_heap(self):""" 我加了这个函数是用来验证每次add或者extract之后,仍保持最大堆的性质"""_len = len(self)for i in range(0, int((_len-1)/2)): # 内部节点(非叶子结点)l = 2 * i + 1r = 2 * i + 2if l < _len and r < _len:assert self._elements[i] >= self._elements[l] and self._elements[i] >= self._elements[r]def test_MaxHeap():""" 最大堆实现的单元测试用例 """_len = 10h = MaxHeap(_len)for i in range(_len):h.add(i)h.assert_keep_heap()for i in range(_len):# 确定每次出来的都是最大的数字,添加的时候是从小到大添加的assert h.extract() == _len-i-1test_MaxHeap()def simpleHeapSort(theSeq):""" 用自己实现的MaxHeap实现堆排序,直接修改原数组实现inplace排序"""if not theSeq:return theSeq_len = len(theSeq)heap = MaxHeap(_len)for i in theSeq:heap.add(i)for i in reversed(range(_len)):theSeq[i] = heap.extract()return theSeqdef test_simpleHeapSort():""" 用一些测试用例证明实现的堆排序是可以工作的 """def _is_sorted(seq):for i in range(len(seq)-1):if seq[i] > seq[i+1]:return Falsereturn Truefrom random import randintassert simpleHeapSort([]) == []for i in range(1000):_len = randint(1, 100)to_sort = []for i in range(_len):to_sort.append(randint(0, 100))simpleHeapSort(to_sort) # 注意这里用了原地排序,直接更改了数组assert _is_sorted(to_sort)test_simpleHeapSort()
14章: Search Trees
二叉差找树性质:对每个内部节点V, 1. 所有key小于V.key的存储在V的左子树。 2. 所有key大于V.key的存储在V的右子树 对BST进行中序遍历会得到升序的key序列
class _BSTMapNode:__slots__ = ('key', 'value', 'left', 'right')def __init__(self, key, value):self.key = keyself.value = valueself.left = Noneself.right = Nonedef __repr__(self):return '<{}:{}> left:{}, right:{}'.format(self.key, self.value, self.left, self.right)__str__ = __repr__class BSTMap:""" BST,树节点包含key可payload。用BST来实现之前用hash实现过的Map ADT.性质:对每个内部节点V,1.对于节点V,所有key小于V.key的存储在V的左子树。2.所有key大于V.key的存储在V的右子树对BST进行中序遍历会得到升序的key序列"""def __init__(self):self._root = Noneself._size = 0self._rval = None # 作为remove的返回值def __len__(self):return self._sizedef __iter__(self):return _BSTMapIterator(self._root, self._size)def __contains__(self, key):return self._bstSearch(self._root, key) is not Nonedef valueOf(self, key):node = self._bstSearch(self._root, key)assert node is not None, 'Invalid map key.'return node.valuedef _bstSearch(self, subtree, target):if subtree is None: # 递归出口,遍历到树底没有找到key或是空树return Noneelif target < subtree.key:return self._bstSearch(subtree.left, target)elif target > subtree.key:return self._bstSearch(subtree.right, target)return subtree # 返回引用def _bstMinumum(self, subtree):""" 顺着树一直往左下角递归找就是最小的,向右下角递归就是最大的 """if subtree is None:return Noneelif subtree.left is None:return subtreeelse:return subtree._bstMinumum(self, subtree.left)def add(self, key, value):""" 添加或者替代一个key的value, O(N) """node = self._bstSearch(self._root, key)if node is not None: # if key already exists, update valuenode.value = valuereturn Falseelse: # insert a new entryself._root = self._bstInsert(self._root, key, value)self._size += 1return Truedef _bstInsert(self, subtree, key, value):""" 新的节点总是插入在树的叶子结点上 """if subtree is None:subtree = _BSTMapNode(key, value)elif key < subtree.key:subtree.left = self._bstInsert(subtree.left, key, value)elif key > subtree.key:subtree.right = self._bstInsert(subtree.right, key, value)# 注意这里没有else语句了,应为在被调用处add函数里先判断了是否有重复keyreturn subtreedef remove(self, key):""" O(N)被删除的节点分为三种:1.叶子结点:直接把其父亲指向该节点的指针置None2.该节点有一个孩子: 删除该节点后,父亲指向一个合适的该节点的孩子3.该节点有俩孩子:(1)找到要删除节点N和其后继S(中序遍历后该节点下一个)(2)复制S的key到N(3)从N的右子树中删除后继S(即在N的右子树中最小的)"""assert key in self, 'invalid map key'self._root = self._bstRemove(self._root, key)self._size -= 1return self._rvaldef _bstRemove(self, subtree, target):# search for the item in the treeif subtree is None:return subtreeelif target < subtree.key:subtree.left = self._bstRemove(subtree.left, target)return subtreeelif target > subtree.key:subtree.right = self._bstRemove(subtree.right, target)return subtreeelse: # found the node containing the itemself._rval = subtree.valueif subtree.left is None and subtree.right is None:# 叶子nodereturn Noneelif subtree.left is None or subtree.right is None:# 有一个孩子节点if subtree.left is not None:return subtree.leftelse:return subtree.rightelse: # 有俩孩子节点successor = self._bstMinumum(subtree.right)subtree.key = successor.keysubtree.value = successor.valuesubtree.right = self._bstRemove(subtree.right, successor.key)return subtreedef __repr__(self):return '->'.join([str(i) for i in self])def assert_keep_bst_property(self, subtree):""" 写这个函数为了验证add和delete操作始终维持了bst的性质 """if subtree is None:returnif subtree.left is not None and subtree.right is not None:assert subtree.left.value <= subtree.valueassert subtree.right.value >= subtree.valueself.assert_keep_bst_property(subtree.left)self.assert_keep_bst_property(subtree.right)elif subtree.left is None and subtree.right is not None:assert subtree.right.value >= subtree.valueself.assert_keep_bst_property(subtree.right)elif subtree.left is not None and subtree.right is None:assert subtree.left.value <= subtree.valueself.assert_keep_bst_property(subtree.left)class _BSTMapIterator:def __init__(self, root, size):self._theKeys = Array(size)self._curItem = 0self._bstTraversal(root)self._curItem = 0def __iter__(self):return selfdef __next__(self):if self._curItem < len(self._theKeys):key = self._theKeys[self._curItem]self._curItem += 1return keyelse:raise StopIterationdef _bstTraversal(self, subtree):if subtree is not None:self._bstTraversal(subtree.left)self._theKeys[self._curItem] = subtree.keyself._curItem += 1self._bstTraversal(subtree.right)def test_BSTMap():l = [60, 25, 100, 35, 17, 80]bst = BSTMap()for i in l:bst.add(i)def test_HashMap():""" 之前用来测试用hash实现的map,改为用BST实现的Map测试 """# h = HashMap()h = BSTMap()assert len(h) == 0h.add('a', 'a')assert h.valueOf('a') == 'a'assert len(h) == 1a_v = h.remove('a')assert a_v == 'a'assert len(h) == 0h.add('a', 'a')h.add('b', 'b')assert len(h) == 2assert h.valueOf('b') == 'b'b_v = h.remove('b')assert b_v == 'b'assert len(h) == 1h.remove('a')assert len(h) == 0_len = 10for i in range(_len):h.add(str(i), i)assert len(h) == _lenfor i in range(_len):assert str(i) in hfor i in range(_len):print(len(h))print('bef', h)_ = h.remove(str(i))assert _ == iprint('aft', h)print(len(h))assert len(h) == 0test_HashMap()