urllib.request.urlopen(url, data=None, [timeout, ]*, cafile=None, capath=None, cadefault=False, context=None)
import urllib.requesturl = 'http://httpbin.org/ip'
response = urllib.request.urlopen(url)
html = response.read() # 返回bytes类型数据
print(html)url = 'http://www.baidu.com'
response = urllib.request.urlopen(url)
html = response.read().decode('utf-8') # 通过decode()方法将bytes类型数据转化为str类型数据
print(html)
发送post数据
import urllib.request
import urllib.parseurl = 'http://httpbin.org/post'data = {'name' : "小明",'age' : 30
}
# data = urllib.parse.urlencode(data) # Error: POST data should be bytes, an iterable of bytes, or a file object. It cannot be of type str
# data = urllib.parse.urlencode(data).encode('utf-8')
data = bytes(urllib.parse.urlencode(data),encoding="utf-8")
response = urllib.request.urlopen(url, data=data)
html = response.read().decode('utf-8')
print(html)
设置timeout
import urllib.requesturl = 'http://httpbin.org/get'
response = urllib.request.urlopen(url, timeout=1)
html = response.read().decode('utf-8')
print(html)
import socket
import urllib.request
import urllib.errorurl = 'http://httpbin.org/get'
try:response = urllib.request.urlopen(url, timeout=0.1)html = response.read().decode('utf-8')print(html)
except urllib.error.URLError as e:print("捕获异常....")print(e.reason)if isinstance(e.reason, socket.timeout):print("请求超时")
响应
响应类型、状态码、响应头、实际获取的url
import urllib.requesturl = 'http://www.python.org'
response = urllib.request.urlopen(url)
# 响应类型
response_type = type(response)
print(response_type) # <class 'http.client.HTTPResponse'>
# 状态码
status_code = response.getcode()
print(status_code)
# 状态码对应的信息
status = response.reason
print(status) # 比如 200对应Ok, 404对应Not Found
# 响应头
response_headers = response.getheaders() # 返回列表
print(response_headers)
server_type = response.getheader('Server') # getheader()获取响应头的指定部分信息
print(server_type)
print(type(response.headers)) # <class 'http.client.HTTPMessage'>
content_type = response.headers['Content-Type'] # 获取Content-Type
print(content_type)
# 实际获取的url, 可以用来判断是否发生重定向
actual_url = response.geturl()
print(actual_url)
ProxyHandler(代理)
import urllib.request# 字典,key为协议类型,value 为 ip地址:端口号
proxy_dict = {#配置代理ip和端口,一定要写成http://+ip+port这种形式,不能去掉前面的http://,否则就会产生错误。'http': 'http://127.0.0.1:6688', 'https': 'https://127.0.0.1:6688',
}headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.109 Safari/537.36',
}proxy_hanlder = urllib.request.ProxyHandler(proxy_dict)
opener = urllib.request.build_opener(proxy_hanlder)
urllib.request.install_opener(opener)opener.addheaders = headers.items() # 设置请求头url = 'http://www.whatismyip.com.tw/' # 被代理的域名或ip
response = urllib.request.urlopen(url)
print(response.read().decode('utf-8'))