1.读取excel文件
1.安装工具
终端下载读取excel文档的工具库:
pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple xlrd
若对版本有特殊需求:
删除当前版本:pip3 uninstall xlrd
下载所需要的版本:pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple xlrd==1.2.0
2.打开目录中的excel文件
import xlrd
excel = xlrd.open_workbook('question.xlsx')
print(excel)
3.搜索excel文件页签
import xlrd
excel = xlrd.open_workbook('question.xlsx')
# 根据页签名找到页签
book1 = excel.sheet_by_name('题库')
print(book1.name)
# 获取第一个页签
book2 = excel.sheet_by_index(0)
print(book2.name)
# 遍历所有页签
for i in excel.sheets():print(i.name)
4. 获得表格中的数据
import xlrd
excel = xlrd.open_workbook('question.xlsx')
book = excel.sheet_by_name('题库')
for i in book.get_rows():content = []for j in i:content.append(j.value)print(content)
2.新建excel文件
1.安装工具
终端下载创建excel文档的工具库(支持写入文本、数字、公式等):
pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple xlsxwriter
2.创建excel文档
import xlsxwriter
# 生成excel
excel = xlsxwriter.Workbook('study.xlsx')
# 添加工作簿
book = excel.add_worksheet('考试成绩')
title = ['姓名','性别','年龄','成绩','等级']
data = [['Adela','woman','20','90','1'],['Zoya','woman','24','95','2']
]
for index,item in enumerate(title):# 向表格中写入内容,参数1:行索引;参数2:列索引;参数3:内容book.write(0,index,item)
for row_index, row_data in enumerate(data):for col_index, item in enumerate(row_data):book.write(row_index + 1, col_index, item) # 从第 1 行开始写入数据
excel.close()
3.复制excel文档
代码思路:读取+新建+写入
import xlsxwriter
import xlrd
def read():result = []excel = xlrd.open_workbook('study.xlsx')book = excel.sheet_by_name('考试成绩')for i in book.get_rows():content = []for j in i:content.append(j.value) # 行中的每一个格子里的数据,并封装到content列表中,作为一行数据result.append(content) # 将每一个行的数据追加到返回结果集列表中return resultdef write(content):excel = xlsxwriter.Workbook('study_copy.xlsx')book = excel.add_worksheet('考试成绩')for index,data in enumerate(content):for sub_index,sub_data in enumerate(data):book.write(index,sub_index,sub_data)excel.close()
if __name__ == '__main__':result = read()write(result)
4.生成excel并附带图表
import xlsxwriter
excel = xlsxwriter.Workbook('study1.xlsx')
book = excel.add_worksheet('成绩等级')
data = [['优秀','良好','中等','一般'],[1100,2000,1000,500]
]
book.write_column('A1',data[0])
book.write_column('B1',data[1])
# 绘制图表
# 参数1:图表类型 type声明设置类型,column柱状图
chart = excel.add_chart({'type':'column'})
# 定义要展示的数据
chart.add_series({'categories':'=成绩等级!$A1:$A4','values':'=成绩等级!$B1:$B4','name':'成绩占比'})
# 设置图表标题
chart.set_title({'name':'成绩占比图'})
# 向表格中插入图表
# 参数1:位置;参数2:图表的引用名
book.insert_chart('A10',chart)
excel.close()
3.读取并计算数据
1.安装工具
终端下载操作excel文档的工具库(大多用于读取、写入和修改等 ):
pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple xlutils
2.读取数据
编程思路:
1.打开 Excel 文件;
2.获取第一个工作表;
3.初始化两个数据:分别是用于存储按分类汇总的总价goods_type
与每行的单品总价count_price
;
4.用遍历对每一行计算单品总价
5.最后返回goods_type
和
count_price
def read_data():wb = xlrd.open_workbook('goods.xlsx')sh = wb.sheet_by_index(0)goods_type = {} # 分类总价 {水果:20,生活用品:30}count_price = [] # 单品总价 [10,20,30]for r in range(sh.nrows):# sh.cell_value(r,4)获得格子中的数据,参数1行索引值,参数2列索引值count = sh.cell_value(r,3)*sh.cell_value(r,4)#将计算的小计值追加到单品总价中count_price.append(count)key = sh.cell_value(r,0)# 判断分类总价中是否包含该商品,如包含则追加计算if goods_type.get(key):goods_type[key] += countelse:goods_type[key] = countreturn goods_type,count_price
3.保存数据
def save(_type,_count):wb = xlrd.open_workbook('goods.xlsx')sh_t = wb.sheet_by_index(0)wb2 = copy(wb) # 复制当前的商品表格sh = wb2.get_sheet(0)for r in range(sh_t.nrows):sh.write(r,sh_t.ncols,_count[r])sh2 = wb2.add_sheet('汇总的数据')for i,key in enumerate(_type.keys()):sh2.write(i,0,key)sh2.write(i,1,_type[key])wb2.save('商品汇总数据.xlsx')
4.主程序
read_data():读取并自动匹配汇总数据和单品总价
save():将结果保存到新的 Excel 文件中
if __name__ == '__main__':t,c = read_data()save(t,c)
4. 拆分Excel文件
将Excel表格中的数据按照A1列分类,并按类别存到不同的sheet页中
import xlrd
from xlutils.copy import copydef get_data():wb = xlrd.open_workbook('goods.xlsx')sh = wb.sheet_by_index(0)"""{a:[{},{},{}],b:[{},{},{}],c:[{},{},{}],}"""all_data = {}for r in range(sh.nrows):d = {'type':sh.cell_value(r,1),'name':sh.cell_value(r,2),'price':sh.cell_value(r,3),'count':sh.cell_value(r,4)}key = sh.cell_value(r,0)if all_data.get(key):all_data[key].append(d)else:all_data[key] = [d]return all_datadef save(data):wb = xlrd.open_workbook('goods.xlsx')wb2 = copy(wb)for key in data.keys():temp_sheet = wb2.add_sheet(key)for i,d in enumerate(data[key]):temp_sheet.write(i, 0, d.get('type'))temp_sheet.write(i, 1, d.get('name'))temp_sheet.write(i, 2, d.get('price'))temp_sheet.write(i, 3, d.get('count'))wb2.save('表格拆分.xlsx')
if __name__ == '__main__':all_data = get_data()save(all_data)
5. 合并Excel文件
终端下载操作excel文档的工具库(除了读取写入,还包括格式化单元格、插入图表、处理多个工作表等功能,功能比xlutils强大,支持大数据量的处理 ):
pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple openpyxl
1.合并至同一sheet页
from openpyxl import load_workbook,Workbook
import os
def copy_data():wb = Workbook() # 获得excel对象sh = wb.active # 获得焦点sheet页all_data = []for name in os.listdir('./销售表'):path = f'./销售表/{name}'tmp_wb = load_workbook(path)tmp_sh = tmp_wb.activefor r in range(1,tmp_sh.max_row+1):# 获取整行数据row_value = []for c in range(1,tmp_sh.max_column+1):value = tmp_sh.cell(r,c).valuerow_value.append(value)# 将获取的行数据,添加到全局数据里if row_value not in all_data:all_data.append(row_value)# 将数据追加至新的excel中for data in all_data:sh.append(data)wb.save('合并Excel.xlsx')
if __name__ == '__main__':copy_data()
2.合并至多个sheet页
from openpyxl import load_workbook,Workbook
import os
def copy_data():wb = Workbook()for name in os.listdir('./销售表'):path = f'./销售表/{name}'tmp_wb = load_workbook(path)tmp_sh = tmp_wb.active# 创建sheet页sh = wb.create_sheet(name[:-5]) # 单肩包.xlsx->单肩包for r in range(1,tmp_sh.max_row+1):row_value = []for c in range(1,tmp_sh.max_column+1):value = tmp_sh.cell(r,c).valuerow_value.append(value)sh.append(row_value)del wb['Sheet']wb.save('合并至多个sheet页.xlsx')
if __name__ == '__main__':copy_data()
6. 生成工资条
省脑细胞式描述:遍历原始工作表的每一行,再单独存到一个新Excel表中
from openpyxl import load_workbook,Workbook
def create_excel():wb = load_workbook('工资数据.xlsx')sh = wb.activetitle = ["工号”,“姓名","部门", "基本工资","提成","加班工资","社保扣除","考勤扣除","应付工资","邮箱"]for i,row in enumerate(sh.rows):if i == 0:continueelse:temp_wbook = Workbook()temp_sh = temp_wbook.activetemp_sh.append(title)row_data = [cell.value for cell in row] #['5732','Adela','研发部','8000',......]获得当前行的数据temp_sh.append(row_data) # 将当前行数据追加直sheet页中temp_wbook.save(f'./工资条/{row_data[1]}.xlsx')
if __name__ == '__main__':create_excel()
当然你可以再把他们合并成一起当作练习,并在新的sheet页中显示工资数的柱状图:
import xlsxwriter
from openpyxl import load_workbook, Workbook
import osdef combine_data():wb = Workbook()sh = wb.activeall_data = []for name in os.listdir('./工资条'):path = f'./工资条/{name}'tmp_wb = load_workbook(path)tmp_sh = tmp_wb.activefor r in range(1, tmp_sh.max_row + 1):row_value = []for c in range(1, tmp_sh.max_column + 1):value = tmp_sh.cell(r, c).valuerow_value.append(value)if row_value not in all_data:all_data.append(row_value)for data in all_data:sh.append(data)wb.save('合并工资条.xlsx')return all_datadef create_chart(data):# 创建新Excelwb = xlsxwriter.Workbook('图表.xlsx')ws = wb.add_worksheet()# 写入数据for row_num, row in enumerate(data):for col_num, value in enumerate(row):ws.write(row_num, col_num, value)# 创建柱状图(假设第2列是姓名,第8列是工资)chart = wb.add_chart({'type': 'column'})chart.add_series({'categories': f'=Sheet1!$B$1:$B${len(data)}','values': f'=Sheet1!$H$1:$H${len(data)}'})# 插入图表ws.insert_chart('K2', chart)wb.close()if __name__ == '__main__':data = combine_data() # 合并数据create_chart(data) # 生成图表
7.统计加班时间
1.搭建一个数据表格
from datetime import date
from openpyxl import load_workbook,Workbook
def create_data():wb =Workbook()sh = wb.activerows = [['Date','姓名','打卡时间'],[date(2025, 4, 2), '张三', '18:50'],[date(2025, 4, 2), '李四', '19:15'],[date(2025, 4, 4), '王五', '22:30'],[date(2025, 4, 5), '赵六', '18:45'],[date(2025, 4, 6), '钱七', '17:20'],[date(2025, 4, 7), '孙八', '19:05'],[date(2025, 4, 8), '周九', '21:10'],[date(2025, 4, 9), '吴十', '22:35'],[date(2025, 4, 2), '李四', '19:15'],]for row in rows:sh.append(row)wb.save('打卡时间.xlsx')
2.处理表格数据
def statistics():wb = load_workbook('打卡时间.xlsx')sh = wb.activedata = []for i in range(2, sh.max_row + 1):t_data = []for j in range(1, sh.max_column + 1):t_data.append(sh.cell(i, j).value)# 统计计算h, m = t_data[2].split(":")full = int(h) * 60 + int(m) # 打卡时间转换成分钟tmp = full - 18 * 60 # 与18:00的差值t_data.append(tmp)t_data[0] = t_data[0].date()data.append(t_data)n_wb = Workbook()n_sh = n_wb.activefor d in data:n_sh.append(d)n_wb.save('统计时间.xlsx')
3.主程序
if __name__ == '__main__':
statistics()
create_data()
8.查重复数据
查询是否有重复的人名
from openpyxl import load_workbook,Workbook
from openpyxl.styles import PatternFill
import os
def dum():wb = load_workbook('打卡时间.xlsx')sh = wb.activeindex = [] #存放重复数据的索引值tmp = []for i,c in enumerate(sh['B']):flag = c.value not in tmpif flag:tmp.append(c.value)else:index.append(i)fill = PatternFill('solid',fgColor='AEEEEE')for i,r in enumerate(sh.rows):if i in index:for c in r:c.fill = fillprint(f'第{i+1}条数据是重复数据')wb.save('查重复数据.xlsx')
if __name__ == '__main__':dum()
9.提取身份证号信息
根据身份证号码的格式,提取出生辰年月,并进行年龄等数学运算
from openpyxl import load_workbook
from datetime import datetime
def get_code():now_year = datetime.now().yearwb = load_workbook('身份证信息.xlsx')sh = wb.activemax_colunm = sh.max_columnfor i,cell in enumerate(sh['B']):pno = cell.value# 6位行政区号 4位年份 2位月份 2月日期 4位个人识别码year = pno[6:10]month = pno[10:12]day = pno[12:14]age = now_year - int(year)sh.cell(i + 1, max_colunm + 1).value = yearsh.cell(i + 1, max_colunm + 2).value = monthsh.cell(i + 1, max_colunm + 3).value = daysh.cell(i + 1, max_colunm + 4).value = agewb.save('提取身份证号信息.xlsx')
if __name__ == '__main__':get_code()