有一个表格,想要具体分析里面的数据,获得在一定条件下的结果,并且想要比较好的可视化。
下面的脚本涉及
- 修改 列的名字,并用excel 中的列名索引
- 剔除第一行,去掉没打分的行
- 对所选择的列统计,并更好的可视化出来
这里需要说明的是最后的定义的两个函数其实只是为了名字好看
from chj.comm.pic import *import pandas as pdimport string
import csvbrief = list(string.ascii_uppercase[:14])
columns = brief # 自己换上合适的名字mp_brief={}
for a, b in zip( brief, columns ):mp_brief[ a ] = bwith open('aimotion.csv', 'r', newline='') as file:reader = csv.DictReader(file, fieldnames=columns)next(reader)data = [row for row in reader]df = pd.DataFrame(data)df.columns = columns + [None]
#del_nms=["talk_zxf_schooldiff", "talk_slow_nosense"]
del_nms=["talk_slow_nosense"]
df = df[~df[ mp_brief[ 'B' ] ].isin(del_nms)]# 剔除 F G H I 这四列不为浮点数的行
cols = ['F', 'G', 'H', 'I']
cols = [ mp_brief[e] for e in cols ]print( df[cols] )
#df = df[pd.to_numeric(df[cols], errors='coerce').notnull().all(axis=1)]
df[cols] = df[cols].apply(pd.to_numeric, errors='coerce')
df = df.dropna(subset=cols)def mean(x):return round(x.mean(), 3)def std(x):return round(x.std(), 3)#grouped = df.groupby( mp_brief[ 'D' ] )[cols].agg(['mean', 'var'])
#custom_agg = lambda x: round(x.mean(), 3), lambda x: round(x.std(), 3)
custom_agg = [ mean, std ]
grouped = df.groupby( mp_brief[ 'D' ] )[cols].agg( custom_agg )
grouped = grouped.reset_index()
#grouped.columns = ['mean', 'std'] * len( cols )
print(grouped)#html_table = grouped.to_html()
html_table = grouped.to_html(classes='styled-table', justify='center', border=0)
# 添加样式信息
html_table = f"<style>.styled-table {{border-collapse: collapse; margin: 20px;}} .styled-table td, .styled-table th {{border: 1px solid black; padding: 8px; text-align: center;}}</style>{html_table}"with open('output_table.html', 'w') as file:file.write(html_table)print("HTML table exported and saved as output_table.html")
生成的网页