环境:Win7_x64 + python3.4.3
需要先下载chardet并进行安装,下载地址:https://pypi.python.org/packages/source/c/chardet/chardet-2.3.0.tar.gz
安装:进入解压后的目录,在命令窗口执行: python setup.py install
写个测试的python脚本吧(DetectURLCoding.py):
#coding:utf-8
'''python 3.x'''
import sys
import urllib.request
import chardet
# 将data写入文件fname
def writeFile(fname, data):
f = open(fname, "wb")
if f:
f.write(data)
f.close()
def blog_detect(blogurl):
'''检测编码方式'''
try:
fp = urllib.request.urlopen(blogurl)
except Exception as e:
print(e)
print('download exception-[%s]' %blogurl)
return 0
blog = fp.read() # python3.x read the html as html code bytearray
fp.close()
#writeFile("t.html", blog)
# get encoding string
codedetect = chardet.detect(blog)['encoding']
print('%s <- %s' %(blogurl, codedetect))
return 1
if __name__=='__main__':
if len(sys.argv) == 1:
print('''usage:
python DetectURLCoding.py http://xxx.com''')
else:
v = blog_detect(sys.argv[1])
print(v)
运行结果:
D:\profile\Desktop>PYTHON de.py http://www.baidu.com
http://www.baidu.com <- utf-8
1
D:\profile\Desktop>PYTHON de.py http://photo.cankaoxiaoxi.com/roll10/2015/0318/709734.shtml
http://photo.cankaoxiaoxi.com/roll10/2015/0318/709734.shtml <- utf-8
1