简单爬虫-python简单爬虫

1,018次阅读

from pyquery import PyQuery as pq import urllib.request import pymysql import uuid

conn = pymysql.connect(host=‘127.0.0.1‘, user=“root“, passwd=“123456“, db=“test“, port=3306, charset=“utf8“) cur = conn.cursor() cur.execute(“select * from user“) #获取数据 users = cur.fetchall() for i in range(len(users)): print(users[i])

#获取原码 def get_content(page): url =‘https://saudi.souq.com/sa-en/mobile-phone-accessories/l/?rpp=32&_=1550499488459&sortby=sr&section=2&page=‘+ str(page) a = urllib.request.urlopen(url)#打开网址 html = a.read().decode(‘utf-8‘)#读取源代码并转为unicode return html

def get(html): doc = pq(html) items = doc(‘.img-link.quickViewAction.sPrimaryLink‘) return items

#多页处理，下载到文件 for j in range(1,3000): print(“正在爬取第“+str(j)+“页数据…“) html = get_content(j) #调用获取网页原码 #for i in get(html): for i in get(html): prodouct_link = pq(i).attr(‘href‘) # 防止有的页面，请求没反应，程序停止 try: doc = pq(url=prodouct_link) title = doc(‘.product-title>h1‘).text() price = doc(‘.price.is.sk-clr1‘).text() stock = doc(‘.txtcolor-alert.xleft>span‘).text() color = doc(‘span.connection.title‘).text() shop_name = doc(‘.unit-seller-link>a>b‘).text() sales = doc(‘.show-for-medium.bold-text‘).text() image = doc(‘.img-bucket>img‘).attr(“src“) prodouct_id = str(uuid.uuid1())

sql = “insert into shop (product_id, product_name,product_link,product_seller,product_price,product_sales,product_stock,product_image) values (%s, %s, %s, %s,%s, %s, %s, %s)“ try: count =cur.execute(sql, [prodouct_id, title, prodouct_link, shop_name, price, sales, stock, image]) # 判断是否成功 if count > 0: print(“添加数据成功！n“) # 提交事务 conn.commit() except: pass except: pass with open(‘job.txt‘, ‘a‘, encoding=‘utf-8‘) as f: f.write(prodouct_link+‘n‘) f.close()

#关闭数据库资源连接 cur.close() conn.close()

神龙|纯净稳定代理IP免费测试>>>>>>>>天启|企业级代理IP免费测试>>>>>>>>IPIPGO|全球住宅代理IP免费测试

发表于：Python爬虫

2022-11-01

# Python爬虫

复制链接

赏

简单爬虫-python简单爬虫

相关文章：

HTTP代理设置详解：一步步配置指南

什么是Socks5代理IP及其优势

Socks5代理配置教程及注意事项

什么是代理服务器IP：如何选择合适的

国外代理服务器的优势及选择建议

如何找到可靠的免费代理服务器

在线代理服务器的使用与推荐

HTTP代理服务器的设置及应用实例

静态代理IP怎么填写：步骤与示例

海外静态IP的代理选择与配置