3.6,3.7pandas合并concat&merge
头文件:
import pandas as pd
import numpy as np
concat基础合并用法
df1= pd.DataFrame(np.ones((3,4))*0,columns = ['a','b','c','d'])
df2= pd.DataFrame(np.ones((3,4))*1,columns = ['a','b','c','d'])
df3= pd.DataFrame(np.ones((3,4))*2,columns = ['a','b','c','d'])res = pd.concat([df1,df2,df3],axis=0,ignore_index=True) #axis=0 竖向合并 ignore_index是让索引重新排序df1= pd.DataFrame(np.ones((3,4))*0,columns = ['a','b','c','d'])
df2= pd.DataFrame(np.ones((3,4))*1,columns = ['b','c','d','e'])
concat中join用法
#join='outer'可以对标签不一样的部分用NAN进行填充
#join = 'inter'时对标签不一样的部分去掉
res = pd.concat([df1,df2],join='outer',ignore_index=True)
join_axes (新版本删除了)
join_axes= [df1.index]设置合并后按照df1的索引进行保留
# res = pd.concat([df1,df2],axis=1,join_axes = [df1.index])
append添加数据
#添加整个数据
df1= pd.DataFrame(np.ones((3,4))*0,columns = ['a','b','c','d'])
df2= pd.DataFrame(np.ones((3,4))*1,columns = ['a','b','c','d'])
df3= pd.DataFrame(np.ones((3,4))*2,columns = ['a','b','c','d'])
res = df1.append([df2,df3],ignore_index=True) #上下结合#添加一行数据
s1 = pd.Series([1,2,3,4],index=['a','b','c','d'])
res = df1.append(s1,ignore_index=True)
merge按照key合并
left = pd.DataFrame({'key':['K0','K1','K2','K3'],'A':['A0','A1','A2','A3'],'B':['B0','B1','B2','B3']
})
right = pd.DataFrame({'key':['K0','K1','K2','K3'],'C':['C0','C1','C2','C3'],'D':['D0','D1','D2','D3']
})
#按照key合并
res = pd.merge(left,right,on='key')
print(res)
merge考虑两个key,根据index进行数据合并
#若考虑两个key,根据index进行数据合并
res = pd.merge(left,right,on=['key1','ke2']) #默认是'inner':只考虑相同部分
res = pd.merge(left,right,on=['key1','ke2'],how='outer') #outer:全部考虑,不存在的用nan填充
merge中indicator
indicator=True时,能展示合并后,哪部分是有某标签数据,哪部分是没有该标签数据的
res = pd.merge(left,right,on=['key1','ke2'],how='outer',indicator=True)
merge中left_index和right_index
根据数据索引进行结合
res = pd.merge(left,right,left_index=True,right_index=True,how=‘outer’)
merge中suffixes
suffixes来区分标签相同但是数值不同的数据
boys = pd.DataFrame({'K':['K0','K1','K2'],'age':[1,2,3]})
girls = pd.DataFrame({'K':['K0','K1','K2'],'age':[4,5,6]})res = pd.merge(boys,girls,on='K',suffixes=['_boy','_girl'],how='inner')
3.8pandas plot画图
头文件:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
Series的数据(类似数组)
data = pd.Series(np.random.randn(1000),index=np.arange(1000))
data = data.cumsum() #对生成的1000个数据进行累加
data.plot()
plt.show()
DataFrame数据(类似矩阵)
折线图:
data = pd.DataFrame(np.random.randn(1000,4),index = np.arange(1000),columns = list("ABCD"))
data = data.cumsum()
data.plot()
plt.show()
散点图
ax = data.plot.scatter(x='A',y='B',color='DarkBlue',label = 'Class 1')
data.plot.scatter(x = 'A',y = 'C',color = 'DarkGreen',label='Class 2',ax=ax)
plt.show()