背景知识
双人纳什均衡求解问题,假设:
- 玩家 0 0 0,行动 { 0 , ⋯ , m − 1 } \{0, \cdots, m-1\} {0,⋯,m−1},收益 A ∈ R + m × n A \in \reals_{+}^{m \times n} A∈R+m×n,策略 p ∈ R + m × 1 p \in \reals_{+}^{m \times 1} p∈R+m×1。
- 玩家 1 1 1,行动 { m , ⋯ , m + n − 1 } \{m, \cdots, m+n-1\} {m,⋯,m+n−1},收益 B ∈ R + m × n B \in \reals_{+}^{m \times n} B∈R+m×n,策略 q ∈ R + n × 1 q \in \reals_{+}^{n \times 1} q∈R+n×1。
最优反应条件:
- A q ∈ { 0 , u } m Aq \in \{0, u\}^{m} Aq∈{0,u}m
- B T p ∈ { 0 , v } n B^Tp \in \{0, v\}^{n} BTp∈{0,v}n
polyhedron:
- P † : { ( p , v ) ∣ 1 T p = 1 , p ≽ 0 , B T p ≼ v } P^{\dag}: \{ (p,v) \mid 1^Tp = 1, p \succcurlyeq 0, B^Tp \preccurlyeq v \} P†:{(p,v)∣1Tp=1,p≽0,BTp≼v}
- Q † : { ( q , u ) ∣ 1 T q = 1 , q ≽ 0 , A q ≼ u } Q^{\dag}: \{ (q,u) \mid 1^Tq = 1, q \succcurlyeq 0, Aq \preccurlyeq u \} Q†:{(q,u)∣1Tq=1,q≽0,Aq≼u}
polytope:
- P : { p ∣ p ≽ 0 , B T p ≼ 1 } P: \{ p \mid p \succcurlyeq 0, B^Tp \preccurlyeq 1 \} P:{p∣p≽0,BTp≼1}
- Q : { q ∣ q ≽ 0 , A q ≼ 1 } Q: \{ q \mid q \succcurlyeq 0, Aq \preccurlyeq 1 \} Q:{q∣q≽0,Aq≼1}
只需求解原点以外的顶点。
算法概述
单独求解 P P P的顶点和 Q Q Q的顶点,然后匹配二者的顶点,根据:
- ( A q ) i < 1 (Aq)_i < 1 (Aq)i<1当且仅当 p i = 0 p_i = 0 pi=0。
- ( B T p ) i < 1 (B^Tp)_i < 1 (BTp)i<1当且仅当 q j = 0 q_j = 0 qj=0。
拓展延申
混合策略 k k k个分量非零称作 k k k支撑。
假设问题非退化,对于任意策略,如果策略是 k k k支撑,那么策略最优反应是 k k k支撑。
如果问题非退化,对于任意纳什均衡,如果 p p p是 k k k支撑,那么 q q q是 k k k支撑,如果 q q q是 k k k支撑,那么 p p p是 k k k支撑。
复杂度:
- support enumeration: O ( 4 min { m , n } ) \Omicron\left(4^{\min\{m,n\}}\right) O(4min{m,n})
- vertex enumeration: O ( 2. 6 min { m , n } ) \Omicron\left(2.6^{\min\{m,n\}}\right) O(2.6min{m,n})(这是高维多面体的性质)
代码
数据格式
http://www.gambit-project.org/gambit14/formats.html
import re
import os
import sys
import timeimport nash2
import nash3if os.name == 'nt':os.system('color')def load_nfg(nfg_path):nfg_comment_list = []nfg_player_list = []nfg_action_list = []nfg_payoff_list = []with open(nfg_path, 'rt') as nfg_file:first_line = nfg_file.readline()assert first_line.startswith('NFG 1 R')first_line = first_line.lstrip('NFG 1 R').lstrip()while not first_line.count('"') >= 1:first_line = nfg_file.readline().rstrip('\n')comment_line = first_line.lstrip('"')comment_line = comment_line.replace('\\"', '\\x22')while not '"' in comment_line:nfg_comment_list.append(comment_line)comment_line = nfg_file.readline().rstrip('\n')comment_line = comment_line.replace('\\"', '\\x22')meta_line = comment_line[comment_line.find('"') + 1 :]while not (meta_line.count('{') >= 2 and meta_line.count('}') >= 2):meta_line += nfg_file.readline().rstrip('\n')string_pattern = re.compile(r'\{([^\}]+)\}')player_pattern = re.compile(r'"([^"]+)"')action_pattern = re.compile(r'([0-9]+)')meta_string = string_pattern.findall(meta_line)player_string = meta_string[0]action_string = meta_string[1]for player in player_pattern.findall(player_string):nfg_player_list.append(player)for action in action_pattern.findall(action_string):nfg_action_list.append(int(action))payoff_line = Nonewhile not payoff_line:payoff_line = nfg_file.readline().rstrip('\n')for payoff in payoff_line.strip().split():nfg_payoff_list.append(int(payoff))return len(nfg_player_list), nfg_action_list, nfg_payoff_listdef dump_ne(ne_path, ne_list):with open(ne_path, 'wt') as ne_file:for ne_record in ne_list:ne_file.write(repr(tuple(ne_record)).lstrip('(').rstrip(')'))ne_file.write('\n')def solve_ne(nfg_path, ne_path):player_number, action_list, payoff_list = load_nfg(nfg_path)if player_number == 2:ne_list = nash2.solve_ne_two_mixed(action_list, payoff_list)else:ne_list = nash3.solve_ne_many_pure(player_number, action_list, payoff_list)dump_ne(ne_path, ne_list)def load_ne(ne_path):ne_list = []component_pattern = re.compile(r'[^,]+')with open(ne_path, 'rt') as ne_file:while (ne_line := ne_file.readline().rstrip('\n').lstrip('[').rstrip(']')):ne_record = []for component_string in component_pattern.findall(ne_line):ne_component = eval(component_string)if abs(ne_component) < 1e-8:ne_component = 0.0ne_record.append(ne_component)ne_list.append(tuple(ne_record))ne_list.sort()return ne_listdef compare_ne(ne_path, ne_tgt_path):ne_list = load_ne(ne_path)ne_tgt_list = load_ne(ne_tgt_path)if len(ne_list) == len(ne_tgt_list):for ne_record, ne_tgt_record in zip(ne_list, ne_tgt_list):for ne_component, ne_tgt_component in zip(ne_record, ne_tgt_record):if abs(ne_component - ne_tgt_component) > 1e-8:return Falsereturn Truedef log_one(ansi, stem, comment):print('{} \033[{}m[{}]\033[0m {}'.format(time.strftime('%H:%M:%S'), ansi, stem, comment))def deploy_all(nfg_dir, ne_dir, stem_list):for stem in stem_list:timestamp_before = time.time()solve_ne(os.path.join(nfg_dir, '{}.nfg'.format(stem)),os.path.join(ne_dir, '{}.ne'.format(stem)),)timestamp_after = time.time()ansi = 93log_one(ansi,stem,'solve_ne in {:.2f}s'.format(timestamp_after - timestamp_before),)def difftest_all(ne_dir, ne_tgt_dir, stem_list):for stem in stem_list:is_same = compare_ne(os.path.join(ne_dir, '{}.ne'.format(stem)),os.path.join(ne_tgt_dir, '{}.ne'.format(stem)),)if is_same:ansi = 92else:ansi = 91log_one(ansi,stem,'compare_ne as {:s}'.format('good' if is_same else 'bad'),)def deploy():curr_dir, _ = os.path.split(__file__)nfg_dir = os.path.join(curr_dir, 'input')ne_dir = os.path.join(curr_dir, 'output')stem_list = []for nfg_name in os.listdir(nfg_dir):if nfg_name.endswith('.nfg'):stem_list.append(nfg_name.rstrip('.nfg'))deploy_all(nfg_dir, ne_dir, stem_list)def difftest():curr_dir, _ = os.path.split(__file__)nfg_dir = os.path.join(curr_dir, 'example_input')ne_dir = os.path.join(curr_dir, 'example_output')ne_tgt_dir = os.path.join(curr_dir, 'example_input')stem_list = []for nfg_name in os.listdir(nfg_dir):if nfg_name.endswith('.nfg'):stem_list.append(nfg_name.rstrip('.nfg'))stem_list.sort()deploy_all(nfg_dir, ne_dir, stem_list)difftest_all(ne_dir, ne_tgt_dir, stem_list)if __name__ == '__main__':supported_routines = {'deploy': deploy, 'difftest': difftest}if len(sys.argv) > 1 and (routine := sys.argv[1]) in supported_routines:supported_routines[routine]()else:difftest()deploy()
双人 混合策略 纳什均衡
import numpy as npfrom scipy.optimize import linprog
from scipy.spatial import HalfspaceIntersectiondef get_bimatrix(action_list, payoff_list):m, n = action_listA = [[None for j in range(n)] for i in range(m)]B = [[None for j in range(n)] for i in range(m)]for j in range(n):for i in range(m):A[i][j] = payoff_list[(i + j * m) * 2 + 0]B[i][j] = payoff_list[(i + j * m) * 2 + 1]A = np.array(A)B = np.array(B)A -= A.min()B -= B.min()return m, n, A, Bdef get_polytope(mat):num, dim = mat.shapeempathy_polyhedron = np.concatenate((mat, -np.ones((num, 1))), axis=1)sanity_polyhedron = np.concatenate((-np.identity(dim), np.zeros((dim, 1))), axis=1)return np.concatenate((empathy_polyhedron, sanity_polyhedron), axis=0)def get_feasible(mat):num, dim = mat.shapeA_ub = np.concatenate((np.concatenate((mat, -np.identity(dim)), axis=0),np.concatenate((np.linalg.norm(mat, axis=1, keepdims=True), np.ones((dim, 1))),axis=0,),),axis=1,)b_ub = np.concatenate((np.ones((num, 1)), np.zeros((dim, 1))), axis=0)c = np.zeros((dim + 1,))c[dim] = -1result = linprog(c=c, A_ub=A_ub, b_ub=b_ub, A_eq=None, b_eq=None, bounds=(0, None))assert result.status == 0return result.x[:-1]def get_hyperplanes(polytope, vertex):A_slack = polytope[:, :-1]b_slack = polytope[:, -1:](hyperplanes,) = np.nonzero(np.isclose(np.ravel(A_slack @ vertex.reshape(-1, 1) + b_slack), 0))return hyperplanes.tolist()def get_vertecies(mat):polytope = get_polytope(mat)feasible = get_feasible(mat)intersection = HalfspaceIntersection(polytope, feasible)vertecies = []for vertex in intersection.intersections:if vertex.max() < np.inf and not np.all(np.isclose(vertex, 0)):vertecies.append((vertex, get_hyperplanes(polytope, vertex)))return verteciesdef solve_ne_two_mixed(action_list, payoff_list):'''算法:多面体顶点。'''m, n, A, B = get_bimatrix(action_list, payoff_list)q_vertecies = get_vertecies(A)p_vertecies = get_vertecies(B.T)mixed_list = []for p_vertex, m_hyperplanes in p_vertecies:for q_vertex, n_hyperplanes in q_vertecies:m_labels = set(m_hyperplanes)n_labels = set(map(lambda i: (i + n) % (m + n), n_hyperplanes))if len(m_labels | n_labels) == m + n:p = p_vertex / p_vertex.sum()q = q_vertex / q_vertex.sum()mixed = p.tolist() + q.tolist()mixed_list.append(mixed)return mixed_list
多人 纯策略 纳什均衡
def get_state_number(action_list):state_number = 1for action_number in action_list:state_number *= action_numberreturn state_numberdef get_state_list(player_number, action_list):state_number = get_state_number(action_list)state_list = []state = [0 for _ in range(player_number)]for state_index in range(state_number):state_list.append([element for element in state])for player in range(player_number):state[player] += 1if state[player] < action_list[player]:breakelse:state[player] = 0return state_listdef get_player_state_list(player_number, action_list, player):player_state_number = get_state_number(action_list) // action_list[player]player_state_list = []player_state = [0 for _ in range(player_number)]player_state[player] = Nonefor player_state_index in range(player_state_number):player_state_list.append([element for element in player_state])for enemy in range(player_number):if enemy != player:player_state[enemy] += 1if player_state[enemy] < action_list[enemy]:breakelse:player_state[enemy] = 0else:continuereturn player_state_listdef solve_ne_many_pure(player_number, action_list, payoff_list):'''算法:共同最优反应。'''state_payoff = {}state_response = {}for state_index, state in enumerate(get_state_list(player_number, action_list)):state_payoff[tuple(state)] = payoff_list[player_number * state_index : player_number * (state_index + 1)]state_response[tuple(state)] = [False for _ in range(player_number)]for player in range(player_number):for player_state in get_player_state_list(player_number, action_list, player):player_state_best_payoff = -float('inf')for player_choice in range(action_list[player]):player_state[player] = player_choiceif (player_state_best_payoff< state_payoff[tuple(player_state)][player]):player_state_best_payoff = state_payoff[tuple(player_state)][player]player_state[player] = Nonefor player_choice in range(action_list[player]):player_state[player] = player_choiceif (player_state_best_payoff<= state_payoff[tuple(player_state)][player]):state_response[tuple(player_state)][player] = Trueplayer_state[player] = Nonepure_list = []for state in get_state_list(player_number, action_list):if all(state_response[tuple(state)]):pure = []for player in range(player_number):player_pure = [0 for _ in range(action_list[player])]player_pure[state[player]] = 1pure += player_purepure_list.append(pure)return pure_list