import collections class headhandler():def __init__(self,mylist):self.mystorage={}self.mylist = mylistdef delempty(self):'''去除重复:return:'''while "" in self.mylist:self.mylist.remove("")def formatmydata(self,i):try:i=i.replace(":","")except Exception:i=ireturn idef fillempty(self):'''只用于处理表头信息:return:'''# 对于不规则列表的处理办法,如果元素的下一个元素仍是字符串类型,或者不存在# 就插入或者用0填充 self.delempty()for i in self.mylist:myindex = self.mylist.index(i)if myindex == 0 or (myindex % 2 == 0):try:nextelement = self.mylist[myindex + 1]if isinstance(self.mylist[myindex + 1], str):self.mylist.insert(myindex + 1, 0)except IndexError:self.mylist.append(0)self.mylist =list(map(self.formatmydata,self.mylist))print(self.mylist)def turntodict(self):self.fillempty()for i in self.mylist[::2]:self.mystorage[i] =self.mylist[self.mylist.index(i)+1]return self.mystoragedef finalchart(self):self.delempty()self.mylist = list(map(self.formatmydata,self.mylist))#print(self.mylist)finalchart = self.turntodict()#print(finalchart)return finalchartclass rowhandler(headhandler):def __init__(self,mylist):super(rowhandler,self).__init__(mylist)def fillempty(self):self.delempty()staticdict={}for myindex,myelement in enumerate(self.mylist):if myelement in staticdict:staticdict[myelement].append(myindex)else:staticdict[myelement]=[]staticdict[myelement].append(myindex)for i in list(staticdict.keys()):if len(staticdict[i])==1:del staticdict[i]else:self.mylist[staticdict[i][0]] =self.mylist[staticdict[i][0]]+'重量'self.mylist[staticdict[i][1]] = self.mylist[staticdict[i][1]] + '含量'self.mylist[staticdict[i][2]] = self.mylist[staticdict[i][2]] + '价格'return self.mylistdef turntodict(self):self.fillempty()for i in self.mylist[::2]:self.mystorage[i] =self.mylist[self.mylist.index(i)+1]return self.mystorage #mylist = ['采购日期:', '', 43495.0, '', '', '', '', '', '', '', '', '', '', '', '', '', '', '索赔金额:', '', '', '', '', '', 0.0, '', ''] mydict= {'a':[1,2],'b':[2,3,4]} for i in list(mydict.keys()):print(mydict[i])if len(mydict[i])>2:del mydict[i] print(mydict) #print(wenwa.index('每吨人工:'))
输出结果:
[1, 2] [2, 3, 4] {'a': [1, 2]}
from anewclass import * class docgen:def __init__(self,mylist):self.mxrows = mylist[1::]self.columnline = mylist[0]self.addlist=[]#用于承载非规则行信息self.mxlist = []def addstring(self):mycounter = dict(collections.Counter(self.columnline))keypos = []finalist = []for i in mycounter.keys():if mycounter[i] > 1:for myindex, myelements in enumerate(self.columnline):if myelements == i:keypos.append(myindex)if myindex == len(self.columnline) - 1:finalist.append(keypos)keypos = []for i in finalist:self.columnline[i[0]] = self.columnline[i[0]] + "重量"self.columnline[i[1]] = self.columnline[i[1]] + "含量"self.columnline[i[2]] = self.columnline[i[2]] + "价格"return self.columnlinedef genmx(self):self.addstring()for i in self.mxrows:if i[0]=="":myhandler = rowhandler(i)self.addlist.append(myhandler.turntodict())else:myrow = rowhandler(self.columnline)self.columnline = myrow.fillempty()self.mxlist.append(dict(zip(self.columnline,i)))def returnall(self):self.genmx()return {'mx':self.mxlist,'others':self.addlist}wuwa =[ ['品名', '采购价', '每吨成本', '重量', '货品总成本', '铜重量', '铝重量', '片重量', '无限长', '锄头马', '铁重量', '铜含量', '铝含量', '片含量', '无限长',
'锄头马', '铁含量', '铜价格', '铝价格', '片价格', '无限长', '锄头马', '铁价格', '产值', '每吨毛利', '货品赢利'], ['铜芯', 0.72, 11956.0, 19.617, 234540.852, 4.665, 0.068, 4.706, 0.506, 1.386, 1.63, 0.23780394555742468, 0.0034663811999796094,
0.23989396951623593, 0.025793954223377682, 0.07065300504664321, 0.08309119641127592, 39200.0, 7000.0, 5050.0, 4500.0, 2750.0, 1800.0,
11791.65009940358, -164.3499005964204, -3224.051999999979], ['', '', '', '', '', '', '23尖角', 1.157, '35尖角', 1.766, '', '', '23尖角', 0.058979456593770706, '35尖角', 0.09002395881123515, '', '',
'23尖角', 5000.0, '35尖角', 3500.0, '', '', '', ''], ['', '', '', '', '', '', '35平角', 1.073, '', '', '', '', '35平角', 0.05469745628791354, '', '', '', '', '35平角', 3000.0, '', '', '', '',
'', ''] ]saiwa = docgen(wuwa) print("===============mx===================") for i in saiwa.returnall()['mx']:print(i) print("===============others===================")for i in saiwa.returnall()['others']:print(i)
输出结果:
[1, 2] [2, 3, 4] {'a': [1, 2]} ===============mx=================== {'品名': '铜芯', '采购价': 0.72, '每吨成本': 11956.0, '重量': 19.617, '货品总成本': 234540.852, '铜重量': 4.665, '铝重量': 0.068, '片重量': 4.706,
'无限长重量': 0.506, '锄头马重量': 1.386, '铁重量': 1.63, '铜含量': 0.23780394555742468, '铝含量': 0.0034663811999796094, '片含量':
0.23989396951623593, '无限长含量': 0.025793954223377682, '锄头马含量': 0.07065300504664321, '铁含量': 0.08309119641127592, '铜价格': 39200.0,
'铝价格': 7000.0, '片价格': 5050.0, '无限长价格': 4500.0, '锄头马价格': 2750.0, '铁价格': 1800.0, '产值': 11791.65009940358, '每吨毛利':
-164.3499005964204, '货品赢利': -3224.051999999979} ===============others=================== {'23尖角重量': 1.157, '35尖角重量': 1.766, '23尖角含量': 0.058979456593770706, '35尖角含量': 0.09002395881123515, '23尖角价格': 5000.0,
'35尖角价格': 3500.0} {'35平角重量': 1.073, '35平角含量': 0.05469745628791354, '35平角价格': 3000.0}
def readexcel(path):datablock = pd.read_excel(path,sheet_name=0)print(len(datablock))wenwa = datablock.head(2)print(type(wenwa.index))print(datablock.index.__dict__)print("columns",datablock.columns[0])print("columns",datablock.head(2).columns)def loadexcel(path):mysheet = xlrd.open_workbook(path)mybook = mysheet.sheet_by_index(0)#print(mybook.row_values(0))colnamelist = mybook.row_values(2)row3 = mybook.row_values(3)#print(dict(zip(colnamelist,row3)))allrets = []for i in range(mybook.nrows):#print(mybook.row_values(i)) allrets.append(mybook.row_values(i))print(mybook.nrows)for i in allrets:#print(i)passreturn allretsdef mergerows(mylist):splitline = 0doc = {}for i in mylist:print(i)k='每吨人工:'if k in i:print('in: ',mylist.index(i))splitline = mylist.index(i)doc["mx"] = mylist[2:splitline-1]doc["header"] = mylist[splitline:]return doc duwa = loadexcel('火烧片 2. MSCU3272441 铜芯.csv') doc = mergerows(duwa) for i in doc['header']:print(i)print("==================mx=============================") for i in doc['mx']:print(i)def dealmx(mylist):if mylist[0]=='':passmylist1=['品名', '采购价', '每吨成本', '重量', '货品总成本', '铜重量', '铝重量', '片重量', '无限长', '锄头马', '铁重量', '铜含量', '铝含量', '片含量',
'无限长', '锄头马', '铁含量', '铜价格', '铝价格', '片价格', '无限长', '锄头马', '铁价格', '产值', '每吨毛利', '货品赢利'] mylist2=['铜芯', 0.72, 11956.0, 19.617, 234540.852, 4.665, 0.068, 4.706, 0.506, 1.386, 1.63, 0.23780394555742468, 0.0034663811999796094,
0.23989396951623593, 0.025793954223377682, 0.07065300504664321, 0.08309119641127592, 39200.0, 7000.0, 5050.0, 4500.0, 2750.0, 1800.0,
11791.65009940358, -164.3499005964204, -3224.051999999979]print(dict(zip(mylist1,mylist2))) print(collections.Counter(mylist1)) print(mylist1.index('无限长'))def addstring(mylist):mycounter = collections.Counter(mylist)keypos=[]finalist=[]for i in mycounter.keys():if mycounter[i]>1:for myindex,myelements in enumerate(mylist):if myelements==i:keypos.append(myindex)if myindex==len(mylist)-1:finalist.append(keypos)keypos = []for i in finalist:mylist[i[0]]=mylist[i[0]]+"重量"mylist[i[1]]=mylist[i[1]]+"含量"mylist[i[2]] = mylist[i[2]] + "价格"return mylist print(addstring(mylist1))mycounter = collections.Counter(mylist1) print(dict(mycounter))
输出结果:
12 ['火烧片', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''] ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''] ['品名', '采购价', '每吨成本', '重量', '货品总成本', '铜重量', '铝重量', '片重量', '无限长', '锄头马', '铁重量', '铜含量', '铝含量', '片含量', '无限长',
'锄头马', '铁含量', '铜价格', '铝价格', '片价格', '无限长', '锄头马', '铁价格', '产值', '每吨毛利', '货品赢利'] ['铜芯', 0.72, 11956.0, 19.617, 234540.852, 4.665, 0.068, 4.706, 0.506, 1.386, 1.63, 0.23780394555742468, 0.0034663811999796094,
0.23989396951623593, 0.025793954223377682, 0.07065300504664321, 0.08309119641127592, 39200.0, 7000.0, 5050.0, 4500.0, 2750.0, 1800.0,
11791.65009940358, -164.3499005964204, -3224.051999999979] ['', '', '', '', '', '', '23尖角', 1.157, '35尖角', 1.766, '', '', '23尖角', 0.058979456593770706, '35尖角', 0.09002395881123515, '', '',
'23尖角', 5000.0, '35尖角', 3500.0, '', '', '', ''] ['', '', '', '', '', '', '35平角', 1.073, '', '', '', '', '35平角', 0.05469745628791354, '', '', '', '', '35平角', 3000.0, '', '', '', '', '',
''] ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''] ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '每吨人工:', '', '', '', '', '总人工', 0.0, '', ''] in: 7 ['采购日期:', '', 43495.0, '', '', '', '', '', '', '', '', '', '', '', '', '', '', '索赔金额:', '', '', '', '', '', 0.0, '', ''] ['计算日期:', '', 43594.0, '', '', '', '', '', '', '', '', '', '', '', '', '', '', '总成本:', '', '', '', '', '', 234540.852, '', ''] ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '总利润:', '', '', '', '', '', -3224.051999999979, '', ''] ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '利润百分比:', '', '', '', '', '', -0.013746227885281063, '', ''] ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '每吨人工:', '', '', '', '', '总人工', 0.0, '', ''] ['采购日期:', '', 43495.0, '', '', '', '', '', '', '', '', '', '', '', '', '', '', '索赔金额:', '', '', '', '', '', 0.0, '', ''] ['计算日期:', '', 43594.0, '', '', '', '', '', '', '', '', '', '', '', '', '', '', '总成本:', '', '', '', '', '', 234540.852, '', ''] ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '总利润:', '', '', '', '', '', -3224.051999999979, '', ''] ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '利润百分比:', '', '', '', '', '', -0.013746227885281063, '', ''] ==================mx============================= ['品名', '采购价', '每吨成本', '重量', '货品总成本', '铜重量', '铝重量', '片重量', '无限长', '锄头马', '铁重量', '铜含量', '铝含量', '片含量', '无限长',
'锄头马', '铁含量', '铜价格', '铝价格', '片价格', '无限长', '锄头马', '铁价格', '产值', '每吨毛利', '货品赢利'] ['铜芯', 0.72, 11956.0, 19.617, 234540.852, 4.665, 0.068, 4.706, 0.506, 1.386, 1.63, 0.23780394555742468, 0.0034663811999796094,
0.23989396951623593, 0.025793954223377682, 0.07065300504664321, 0.08309119641127592, 39200.0, 7000.0, 5050.0, 4500.0, 2750.0, 1800.0,
11791.65009940358, -164.3499005964204, -3224.051999999979]
['', '', '', '', '', '', '23尖角', 1.157, '35尖角', 1.766, '', '', '23尖角', 0.058979456593770706, '35尖角', 0.09002395881123515, '', '',
'23尖角', 5000.0, '35尖角', 3500.0, '', '', '', '']
['', '', '', '', '', '', '35平角', 1.073, '', '', '', '', '35平角', 0.05469745628791354, '', '', '', '', '35平角', 3000.0, '', '', '', '', '',
'']
{'品名': '铜芯', '采购价': 0.72, '每吨成本': 11956.0, '重量': 19.617, '货品总成本': 234540.852, '铜重量': 4.665, '铝重量': 0.068, '片重量': 4.706,
'无限长': 4500.0, '锄头马': 2750.0, '铁重量': 1.63, '铜含量': 0.23780394555742468, '铝含量': 0.0034663811999796094, '片含量': 0.23989396951623593,
'铁含量': 0.08309119641127592,
'铜价格': 39200.0, '铝价格': 7000.0, '片价格': 5050.0, '铁价格': 1800.0, '产值': 11791.65009940358, '每吨毛利': -164.3499005964204, '货品赢利':
-3224.051999999979}
Counter({'无限长': 3, '锄头马': 3, '品名': 1, '采购价': 1, '每吨成本': 1, '重量': 1, '货品总成本': 1, '铜重量': 1, '铝重量': 1, '片重量': 1,
'铁重量': 1, '铜含量': 1, '铝含量': 1, '片含量': 1, '铁含量': 1, '铜价格': 1, '铝价格': 1, '片价格': 1, '铁价格': 1, '产值': 1, '每吨毛利': 1,
'货品赢利': 1})
8 ['品名', '采购价', '每吨成本', '重量', '货品总成本', '铜重量', '铝重量', '片重量', '无限长重量', '锄头马重量', '铁重量', '铜含量', '铝含量', '片含量',
'无限长含量', '锄头马含量', '铁含量', '铜价格', '铝价格', '片价格', '无限长价格', '锄头马价格', '铁价格', '产值', '每吨毛利', '货品赢利'] {'品名': 1, '采购价': 1, '每吨成本': 1, '重量': 1, '货品总成本': 1, '铜重量': 1, '铝重量': 1, '片重量': 1, '无限长重量': 1, '锄头马重量': 1,
'铁重量': 1, '铜含量': 1, '铝含量': 1, '片含量': 1, '无限长含量': 1, '锄头马含量': 1, '铁含量': 1, '铜价格': 1, '铝价格': 1, '片价格': 1,
'无限长价格': 1, '锄头马价格': 1, '铁价格': 1, '产值': 1, '每吨毛利': 1, '货品赢利': 1}