urllib.request模块
- 模拟浏览器构建一个Get请求爬取网页
import urllib.request
url = 'https://httpbin.org/post' reponse = urllib.request.urlopen(url) #打开url #response 是 HTTPResposne 类型的对象 print( response.read().decode('utf-8') ) #以utf-8编码方式解码读取返回网页的内容,并打印
import urllib.request,parse
url = 'https://httpbin.org/Post' dict = {'name':'liu'}
str = urllib.parse.urlencode(dic) # 将字典dict序列化为 GET 请求参数并返回 # GET请求参数就是URL ?后面的 赋值语句
data = bytes(str,encoding='utf-8') # 按照encoding的值来将str转码成字节流对象并返回
try: reponse = urllib.request.urlopen(url,data = data,timeout = 2) # 传递data给data参数(必须是bytes类型)并设置超时时间为2秒 # 超过2秒抛出 TimeoutError 异常 #一旦传递了data参数,这变成了一个Post请求 print( response.read().decode('utf-8') )
except urllib.error.URLError as e: if isinstance(e.reason, socket.timeout): print('TIME OUT')
import urllib.request
url = 'https://httpbin.org/post'
#请求头 headers = { 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)', 'Host': 'httpbin.org' }
dict = {'name':'liu'} str = urllib.parse.urlencode(dic) data = bytes(str,encoding='utf-8')
#用url,data(必须是bytes类型),headers,POST实参构造Request对象 request = urllib.request.Request(url,data=data, headers=headers, method='POST') response = urllib.request.urlopen(request) print( response.read().decode('utf-8') )
其他相关文章推荐
神龙|纯净稳定代理IP免费测试>>>>>>>>天启|企业级代理IP免费测试>>>>>>>>IPIPGO|全球住宅代理IP免费测试