京东商品页面爬取
url = "https://item.jd.com/5821455.html" try: r = requests.get(url) r.raise_for_status() r.encoding=r.apparent_encoding print(r.text) except: print("爬取失败")
亚马逊商品页面爬取
import requests
url = "https://www.amazon.cn/dp/B077V5Q9L2?_encoding=UTF8&ref_=pc_cxrd_658407051_bestTab_658407051_a_best_1&pf_rd_p=797efab3-8794-41b4-a33a-eca4c6cf5ae1&pf_rd_s=merchandised-search-5&pf_rd_t=101&pf_rd_i=658407051&pf_rd_m=A1AJ19PSB66TGU&pf_rd_r=1GD0NPS97SJE6EHCK2K7&pf_rd_r=1GD0NPS97SJE6EHCK2K7&pf_rd_p=797efab3-8794-41b4-a33a-eca4c6cf5ae1" try: kv = {"user-agent":'Mozilla/5.0'} r = requests.get(url,headers=kv) r.raise_for_status() r.encoding = r.apparent_encoding print(r.text[1000:2000]) except: print("爬取失败")
百度搜索殷勤关键字提交借口
百度关键词接口:
https://www.baidu.com/s?wd=关键词
import requests kv = {'wd':'金刚狼'} url = "https://www.baidu.com/s" try: r = requests.get(url,params=kv) print(r.status_code) print(r.url) print(len(r.text)) except: print("爬取失败")
网络图片的爬取和存储
import requests import os url = "https://images-cn.ssl-images-amazon.com/images/I/51zQ8gQpDHL._SX350_BO1,204,203,200_.jpg" root = "/Users/apple/Desktop/" path = root + url.split('/')[-1] try: if not os.path.exists(root): os.mkdir(root) if not os.path.exists(path): r = requests.get(url) with open(path,"wb+") as f: f.write(r.content) f.close() print("文件保存成功") else: print("文件已存在") except: print("爬取失败") import requests url = "http://m.ip138.com/ip.asp?ip=" try: r = requests.get(url + "196.168.6.39") r.raise_for_status() r.encoding = r.apparent_encoding print(r.text[-500:]) except: print("爬取失败")
神龙|纯净稳定代理IP免费测试>>>>>>>>天启|企业级代理IP免费测试>>>>>>>>IPIPGO|全球住宅代理IP免费测试