搜索热词
下面是编程之家 jb51.cc 通过网络收集整理的代码片段。
编程之家小编现在分享给大家,也给大家做个参考。
#coding=utf-8
__author__ = 'zhm'
from win32com import client as wc
import os
import time
import random
import MysqLdb
import re
def wordsToHtml(dir):#批量把文件夹的word文档转换成html文件
#金山WPS调用,抢先版的用KWPS,正式版WPS
word = wc.Dispatch('KWPS.Application')
for path,subdirs,files in os.walk(dir):
for wordFile in files:
wordFullName = os.path.join(path,wordFile)
#print "word:" + wordFullName
doc = word.Documents.Open(wordFullName)
wordFile2 = unicode(wordFile,"gbk")
dotIndex = wordFile2.rfind(".")
if(dotIndex == -1):
print '********************ERROR: 未取得后缀名!'
fileSuffix = wordFile2[(dotIndex + 1) : ]
if(fileSuffix == "doc" or fileSuffix == "docx"):
fileName = wordFile2[ : dotIndex]
htmlName = fileName + ".html"
htmlFullName = os.path.join(unicode(path,"gbk"),htmlName)
# htmlFullName = unicode(path,"gbk") + "\\" + htmlName
print u'生成了html文件:' + htmlFullName
doc.SaveAs(htmlFullName,8)
doc.Close()
word.Quit()
print ""
print "Finished!"
def html_add_to_db(dir):#将转换成功的html文件批量插入数据库中。
conn = MysqLdb.connect(
host='localhost',port=3306,user='root',passwd='root',db='test',charset='utf8'
)
cur = conn.cursor()
for path,files in os.walk(dir):
for htmlFile in files:
htmlFullName = os.path.join(path,htmlFile)
title = os.path.splitext(htmlFile)[0]
targetDir = 'D:/files/htmls/' #D:/files为web服务器配置的静态目录
sconds = time.time()
msconds = sconds * 1000
targetFile = os.path.join(targetDir,str(int(msconds))+str(random.randint(100,10000)) +'.html')
htmlFile2 = unicode(htmlFile,"gbk")
dotIndex = htmlFile2.rfind(".")
if(dotIndex == -1):
print '********************ERROR: 未取得后缀名!'
fileSuffix = htmlFile2[(dotIndex + 1) : ]
if(fileSuffix == "htm" or fileSuffix == "html"):
if not os.path.exists(targetDir):
os.makedirs(targetDir)
htmlFullName = os.path.join(unicode(path,htmlFullName)
htFile = open(htmlFullName,'rb')
#获取网页内容
htmStrCotent = htFile.read()
#找出里面的图片
img=re.compile(r"""""",re.I)
m = img.findall(htmStrCotent)
for tagContent in m:
imgSrc = unicode(tagContent,"gbk")
imgSrcFullName = os.path.join(path,imgSrc)
#上传图片
imgTarget = 'D:/files/images/whzx/'
img_sconds = time.time()
img_msconds = sconds * 1000
targetImgFile = os.path.join(imgTarget,str(int(img_msconds))+str(random.randint(100,10000)) +'.png')
if not os.path.exists(imgTarget):
os.makedirs(imgTarget)
if not os.path.exists(targetImgFile) or(os.path.exists(targetImgFile) and (os.path.getsize(targetImgFile) != os.path.getsize(imgSrcFullName))):
tmpImgFile = open(imgSrcFullName,'rb')
tmpWriteImgFile = open(targetImgFile,"wb")
tmpWriteImgFile.write(tmpImgFile.read())
tmpImgFile.close()
tmpWriteImgFile.close()
htmStrCotent=htmStrCotent.replace(tagContent,targetImgFile.split(":")[1])
if not os.path.exists(targetFile) or(os.path.exists(targetFile) and (os.path.getsize(targetFile) != os.path.getsize(htmlFullName))):
#用iframe包装转换好的html文件。
iframeHtml='''
function iFrameHeight() {
var ifm= document.getElementById("iframepage");
var subWeb = document.frames ? document.frames["iframepage"].document:ifm.contentDocument;
if(ifm != null && subWeb != null) {
ifm.height = subWeb.body.scrollHeight;
}
}
marginheight="0" marginwidth="0" frameborder="0" scrolling="no" width="765" height=100% id="iframepage" name="iframepage" onLoad="iFrameHeight()" >
'''
tmpTargetFile = open(targetFile,"wb")
tmpTargetFile.write(htmStrCotent)
tmpTargetFile.close()
htFile.close()
try:
# 执行
sql = "insert into common_article(title,content) values(%s,%s)"
param = (unicode(title,iframeHtml)
cur.execute(sql,param)
except:
print "Error: unable to insert data"
cur.close()
conn.commit()
# 关闭数据库连接
conn.close()
if __name__ == '__main__':
wordsToHtml('d:/word')
html_add_to_db('d:/word')
以上是编程之家(jb51.cc)为你收集整理的全部代码内容,希望文章能够帮你解决所遇到的程序开发问题。
如果觉得编程之家网站内容还不错,欢迎将编程之家网站推荐给程序员好友。
总结
如果觉得编程之家网站内容还不错,欢迎将编程之家网站推荐给程序员好友。
本图文内容来源于网友网络收集整理提供,作为学习参考使用,版权属于原作者。