import matplotlib.pyplot as plt
from pandas import read_excel
import numpy as np
import tushare as ts
import pandas as pd
import datetime
token=''
pro=ts.pro_api(token)
获取财务数据
#获取财务数据
ticker_list = ['601318.SH','601336.SH','601398.SH','601888.SH','603993.SH']
for ticker in ticker_list:df = pro.daily_basic(ts_code = ticker,fields = 'ts_code,trade_date,total_mv,circ_mv,pe,pb')df1 = pro.daily(ts_code = ticker)df1.to_excel('stock_data/'+ticker + '.xlsx')df.to_excel('stock_data/'+ticker + '_basic.xlsx')
获取股票价格数据
#获取股票价格数据
ticker_list = ['601318.SH','601336.SH','601398.SH','601888.SH','603993.SH']
for ticker in ticker_list:df = pro.daily(ts_code = ticker)df.to_excel('stock_data/'+ticker + '.xlsx')
设定股票池
#设定股票池
ticker_list = ['601318.SH','601336.SH','601398.SH','601888.SH','603993.SH']
#循环获取股票价格和财务数据
for ticker in ticker_list:df1 = pd.read_excel('stock_data/'+ticker + '.xlsx',engine='openpyxl')df2 = pd.read_excel('stock_data/'+ticker + '_basic.xlsx',engine='openpyxl')df2 = df2[['trade_date','pe','pb','total_mv','circ_mv']]df3 = pd.merge(df1,df2,on='trade_date')df3.to_excel('stock_data/'+ticker + '.xlsx')
#设定空的DataFrame变量,存储数据
StockPrices = pd.DataFrame()
#存储每只股票的平均市值
market_value_list = []
ticker_list = ['601318.SH','601336.SH','601398.SH','601888.SH','603993.SH']
for ticker in ticker_list:df = read_excel('stock_data/'+ticker+'.xlsx',engine='openpyxl')#更改日期格式df['trade_date'] = df.trade_date.apply(lambda x:datetime.datetime.strptime(str(x),"%Y%m%d").strftime("%Y-%m-%d"))#按照日期升序排列。上小下大df =df.sort_values('trade_date',ascending=True)stock_data = df.set_index('trade_date')#print(stock_data.head())
# stock_data = stock_data.loc['2016-03-01':'2017-12-29']#print(stock_data.head())#获取股票收盘价StockPrices[ticker] = stock_data['close']#print(StockPrices.head())#将市值存入列表market_value_list.append(stock_data['total_mv'].mean())#print(market_value_list[:5])
StockPrices.index.name= 'trade_date'
print(StockPrices.head())#计算每日收益率,并丢弃缺失值
StockReturns = StockPrices.pct_change().dropna()
#打印前5行数据
print(StockReturns.head())
计算加权收益
#将收益率数据拷贝到新的变量stock_return,这是为了后续调用的方便
stock_return = StockReturns.copy()
#print(stock_return.head())
#设置组合权重,存储为numpy数据类型
portfolio_weights = np.array([0.32,0.15,0.10,0.18,0.25])
#计算加权的股票收益
WeightedReturns = stock_return.mul(portfolio_weights,axis=1)
#计算投资组合的收益
StockReturns['Portfolio'] = WeightedReturns.sum(axis=1)#print(StockReturns.head())
#绘制组合收益随时间变化的图
StockReturns.Portfolio.plot()
plt.show()
组合收益累计曲线
#定义累积收益曲线绘制函数
def cumulative_returns_plot(name_list):for name in name_list:CumlativeReturns = ((1+StockReturns[name]).cumprod()-1)CumlativeReturns.plot(label=name)plt.legend()plt.show()
#计算累积的组合收益,并绘图
cumulative_returns_plot(['Portfolio'])
等权重收益曲线
#等权重的投资组合
#设置投资组合中的股票的数据
numstocks = 5
#平均分配每一项的权重
portfolio_weights_ew = np.repeat(1/numstocks,numstocks)
#计算等权重组合的收益
StockReturns['Portfolio_EW'] = stock_return.mul(portfolio_weights_ew,axis=1).sum(axis=1)
#打印前5行数据
print(StockReturns.head())
#绘制累计收益曲线
cumulative_returns_plot(['Portfolio','Portfolio_EW'])
市值加权
#市值加权的投资组合
#将上述获得的每支股票的平均是指转换为数组
market_values = np.array(market_value_list)
#计算市值权重
market_weights = market_values / np.sum(market_values)
#计算市值加权的组合收益
StockReturns['Portfolio_MVal'] = stock_return.mul(market_weights,axis=1).sum(axis=1)
#打印前5行数据
print(StockReturns.head())
#绘制累积收益曲线
cumulative_returns_plot(['Portfolio','Portfolio_EW','Portfolio_MVal'])
投资组合的相关性分析
#投资组合的相关矩阵
#相关矩阵用于估算多支股票收益之间的线性关系
#计算相关矩阵
correlation_matrix = stock_return.corr()
#输出相关矩阵
print(correlation_matrix)
import seaborn as sns
#创建热力图
sns.heatmap(correlation_matrix,annot=True,cmap='rainbow',linewidths=1.0,annot_kws={'size':8})
plt.xticks(rotation=0)
plt.yticks(rotation=75)
plt.show()
#投资组合的协方差矩阵 #相关系数只反应了股票之间的线性关系,但并不能告诉我们股票的波动情况,而协方差矩阵则包含这一信息
#计算协方差矩阵
cov_mat = stock_return.cov()
#年化协方差矩阵
cov_mat_annual = cov_mat * 252
#输出协方差矩阵
print(cov_mat_annual)
sns.heatmap(cov_mat_annual,annot=True,cmap='rainbow',linewidths=1.0,annot_kws={'size':8})
#投资组合的标准差
#计算投资组合的标准差
portfolio_volatility = np.sqrt(np.dot(portfolio_weights.T,np.dot(cov_mat_annual,portfolio_weights)))
print(portfolio_volatility)
#计算股票的最优投资组合
#使用蒙特卡洛模拟Markowitz模型
#设置模拟的次数
number = 10000
#设置空的numpy数组,用于存储每次模拟得到的权重,收益率和标准差
random_p = np.empty((number,7))
#设置随机数种子,这里是为了结果可重复
np.random.seed(7)#循环模拟10000次随机的投资组合
for i in range(number):#生成5个随机数,并归一化,得到一组随机的权重数据random5 = np.random.random(5)random_weight = random5/np.sum(random5)#计算年平均收益率mean_return = stock_return.mul(random_weight,axis=1).sum(axis=1).mean()annual_return = (1+mean_return)**252 -1#计算年化标准差,也成为波动率random_volatility = np.sqrt(np.dot(random_weight.T,np.dot(cov_mat_annual,random_weight)))#将上面生成的权重,和计算得到的收益率、标准差存入数据random_p中random_p[i][:5] =random_weightrandom_p[i][:5] = annual_returnrandom_p[i][6] = random_volatility#将Numpy数组转化为DataF数据框
RandomPortfolios = pd.DataFrame(random_p)
#设置数据框RandomPortfolios每一列的名称
RandomPortfolios.columns=[ticker + '_weight' for ticker in ticker_list]+['Returns','Volatility']#绘制散点图
RandomPortfolios.plot('Volatility','Returns',kind='scatter',alpha=0.3)
plt.show()
#投资风险最小组合
#找到标准差最小数据的索引列
min_index = RandomPortfolios.Volatility.idxmin()#在收益-风险散点图中突出风险最小的点
RandomPortfolios.plot('Volatility','Returns',kind= 'scatter',alpha = 0.3)
x = RandomPortfolios.loc[min_index,'Volatility']
y = RandomPortfolios.loc[min_index,'Returns']
plt.scatter(x,y,color='red')
#将该点坐标显示在途中并保留四位小数
plt.text(np.round(x,4),np.round(y,4),(np.round(x,4),np.round(y,4)),ha= 'left',va='bottom',fontsize=10)
plt.show()
#提取足校波动组合对应的权重,并转换成numpy数组
GMV_weights = np.array(RandomPortfolios.iloc[min_index,0:numstocks])
#计算GMV投资组合收益
StockReturns['Portfolio_GMV'] = stock_return.mul(GMV_weights,axis=1).sum(axis=1)
#输出风险最小投资组合的权重
print(GMV_weights)
#投资风险最大组合
#找到标准差最大数据的索引列
max_index = RandomPortfolios.Volatility.idxmax()#在收益-风险散点图中突出风险最小的点
RandomPortfolios.plot('Volatility','Returns',kind= 'scatter',alpha = 0.3)
x = RandomPortfolios.loc[max_index,'Volatility']
y = RandomPortfolios.loc[max_index,'Returns']
plt.scatter(x,y,color='red')
#将该点坐标显示在途中并保留四位小数
plt.text(np.round(x,4),np.round(y,4),(np.round(x,4),np.round(y,4)),ha= 'left',va='bottom',fontsize=10)
plt.show()
#提取足校波动组合对应的权重,并转换成numpy数组
GMV_weights = np.array(RandomPortfolios.iloc[max_index,0:numstocks])
#计算GMV投资组合收益
StockReturns['Portfolio_GMV'] = stock_return.mul(GMV_weights,axis=1).sum(axis=1)
#输出风险最小投资组合的权重
print(GMV_weights)
#投资收益最小组合
#找到收益最小数据的索引列
min_index = RandomPortfolios.Returns.idxmin()#在收益-风险散点图中突出风险最小的点
RandomPortfolios.plot('Volatility','Returns',kind= 'scatter',alpha = 0.3)
x = RandomPortfolios.loc[min_index,'Volatility']
y = RandomPortfolios.loc[min_index,'Returns']
plt.scatter(x,y,color='red')
#将该点坐标显示在途中并保留四位小数
plt.text(np.round(x,4),np.round(y,4),(np.round(x,4),np.round(y,4)),ha= 'left',va='bottom',fontsize=10)
plt.show()
#提取足校波动组合对应的权重,并转换成numpy数组
GMV_weights = np.array(RandomPortfolios.iloc[min_index,0:numstocks])
#计算GMV投资组合收益
StockReturns['Portfolio_GMV'] = stock_return.mul(GMV_weights,axis=1).sum(axis=1)
#输出风险最小投资组合的权重
print(GMV_weights)
#投资收益最大组合
#找到收益最大数据的索引列
max_index = RandomPortfolios.Returns.idxmax()#在收益-风险散点图中突出风险最小的点
RandomPortfolios.plot('Volatility','Returns',kind= 'scatter',alpha = 0.3)
x = RandomPortfolios.loc[max_index,'Volatility']
y = RandomPortfolios.loc[max_index,'Returns']
plt.scatter(x,y,color='red')
#将该点坐标显示在途中并保留四位小数
plt.text(np.round(x,4),np.round(y,4),(np.round(x,4),np.round(y,4)),ha= 'left',va='bottom',fontsize=10)
plt.show()
#提取足校波动组合对应的权重,并转换成numpy数组
GMV_weights = np.array(RandomPortfolios.iloc[max_index,0:numstocks])
#计算GMV投资组合收益
StockReturns['Portfolio_GMV'] = stock_return.mul(GMV_weights,axis=1).sum(axis=1)
#输出风险最小投资组合的权重
print(GMV_weights)
#投资最优组合
#夏普最优组合的选择
#设置无风险回报率为0.03
risk_free = 0.03
#计算每项资产的夏普比率
RandomPortfolios['Sharpe'] = (RandomPortfolios.Returns - risk_free) / RandomPortfolios.Volatility
#绘制收益-标准差的散点图,并用颜色描绘夏普比率
plt.scatter(RandomPortfolios.Volatility,RandomPortfolios.Returns,c=RandomPortfolios.Sharpe)
plt.colorbar(label='Sharpe Ratio')
plt.show()
# 找到夏普比率最大数据对应的索引值
max_index = RandomPortfolios.Sharpe.idxmax()
# 在收益-风险散点图中突出夏普比率最大的点
RandomPortfolios.plot('Volatility', 'Returns', kind='scatter', alpha=0.3)
x = RandomPortfolios.loc[max_index,'Volatility']
y = RandomPortfolios.loc[max_index,'Returns']
plt.scatter(x, y, color='red')
#将该点坐标显示在图中并保留四位小数
plt.text(np.round(x,4),np.round(y,4),(np.round(x,4),np.round(y,4)),ha='left',va='bottom',fontsize=10)
plt.show()# 提取最大夏普比率组合对应的权重,并转化为numpy数组
MSR_weights = np.array(RandomPortfolios.iloc[max_index, 0:numstocks])
# 计算MSR组合的收益
StockReturns['Portfolio_MSR'] = stock_return.mul(MSR_weights, axis=1).sum(axis=1)
#输出夏普比率最大的投资组合的权重
print(MSR_weights)
# 找到夏普比率最大数据对应的索引值
max_index = RandomPortfolios.Sharpe.idxmax()
# 在收益-夏普散点图中突出夏普比率最大的点
RandomPortfolios.plot('Sharpe', 'Returns', kind='scatter', alpha=0.3)
x = RandomPortfolios.loc[max_index,'Sharpe']
y = RandomPortfolios.loc[max_index,'Returns']
plt.scatter(x, y, color='red')
#将该点坐标显示在图中并保留四位小数
plt.text(np.round(x,4),np.round(y,4),(np.round(x,4),np.round(y,4)),ha='left',va='bottom',fontsize=10)
plt.show()# 提取最大夏普比率组合对应的权重,并转化为numpy数组
MSR_weights = np.array(RandomPortfolios.iloc[max_index, 0:numstocks])
# 计算MSR组合的收益
StockReturns['Portfolio_MSR'] = stock_return.mul(MSR_weights, axis=1).sum(axis=1)
#输出夏普比率最大的投资组合的权重
print(MSR_weights)