python爬取微博设备显示怎么更改

745次阅读
没有评论
python爬取微博设备显示怎么更改

#coding:utf-8

from __future__ import (

print_function,

unicode_literals)

import requests

import re

import json

import base64

import time

import math

import random

from PIL import Image

try:

from urllib.parse import quote_plus

except:

from urllib import quote_plus

'''1.用base64加密用户名之后仿造一个预登陆,用正则匹配得到各项参数2.用上一步里得到的参数,拼接密码明文,再用RSA加密得到密文,并构造POST的form data。3.使用构造好的form data仿造登录请求4.用正则匹配获得跳转的目标链接。5.为了保持登陆,使用session保存cookie。'''

#构造 Request 的headers部分

agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36 QIHU 360EE'

headers = {

"Host": "passport.weibo.cn",

"Connection": "keep-alive",

"Upgrade-Insecure-Requests": "1",

'User-Agent': agent

}

'''会话对象requests.Session能够跨请求地保持某些参数,比如cookies,即在同一个Session实例发出的所有请求都保持同一个cookies,而requests模块每次会自动处理cookies,这样就很方便地处理登录时的cookies问题。'''

session = requests.session()

#访问登陆页面

index_url="https://passport.weibo.cn/signin/login"

#获得

session.get(index_url,headers=headers)

def get_su(username):

'''对用户名进行base64编码,返回编码后的usernamebase64字符串:param username::return:username_base64'''

username_url=quote_plus(username)

username_base64 = base64.b64encode(username_url.encode("utf-8"))

return username_base64.decode("utf-8")

def login_pre(username):

'''预登陆:param username::return:'''

params = {

"checkpin": "1",

"entry": "mweibo",

"su": get_su(username),

"callback": "jsonpcallback" + str(int(time.time() * 1000) + math.floor(random.random() * 100000))

}

pre_url= "https://login.sina.com.cn/sso/prelogin.php"

headers["Host"] = "login.sina.com.cn"

headers["Referer"] = index_url

pre = session.get(pre_url, params=params, headers=headers)

#print(pre.text)

#res = re.findall(r'((.*?))',pre.text)

if pre.text == "" :

print("预登陆失败")

else:

js = pre.json()

#js = json.loads(js)

if js["showpin"] == 1:

headers["Host"] = "passport.weibo.cn"

capt = session.get("https://passport.weibo.cn/captcha/image", headers=headers)

capt_json = capt.json()

capt_base64 = capt_json['data']['image'].split("base64,")[1]

with open('capt.jpg', 'wb') as f:

f.write(base64.b64decode(capt_base64))

f.close()

im = Image.open("capt.jpg")

im.show()

im.close()

cha_code = input("请输入验证码n>")

return cha_code, capt_json['data']['pcid']

else:

return ""

def login (username, password, pincode):

postdata = {

'username' : username,

'password' : password,

'savestate' : '1',

'ec' : '0',

'pagerefer' : '',

'entry' : 'mweibo',

'wentry' : '',

'loginfrom' :'',

'client_id' : '',

'code' : '',

'qq' : '',

'mainpageflag' :'1',

'hff' :''

}

if pincode == '':

pass

else:

postdata["pincode"] = pincode[0]

postdata["pcid"] = pincode[1]

headers["Host"] = "passport.weibo.cn"

headers["Reference"] = index_url

headers["Origin"] = "https://passport.weibo.cn"

headers["Content-Type"] = "application/x-www-form-urlencoded"

post_url = "https://passport.weibo.cn/sso/login"

login = session.post(post_url, data=postdata, headers=headers)

# print(login.cookies)

# print(login.status_code)

js = login.json()

# print(js)

uid = js["data"]["uid"]

crossdomain = js["data"]["crossdomainlist"]

cn = "https:" + crossdomain["sina.com.cn"]

# 下面两个对应不同的登录 weibo.com 还是 m.weibo.cn

# 一定要注意更改 Host

# mcn = "https:" + crossdomain["weibo.cn"]

# com = "https:" + crossdomain['weibo.com']

headers["Host"] = "login.sina.com.cn"

session.get(cn, headers=headers)

headers["Host"] = "weibo.cn"

ht = session.get("http://weibo.cn/%s/info" % uid, headers=headers)

# print(ht.url)

# print(session.cookies)

pa = r'

(.*?)'

res = re.findall(pa, ht.text)

print("模拟登录%s" % res[0][0:-3])

home=session.get("https://m.weibo.cn/",headers=headers)

print(home.text)

# print(cn, com, mcn)

if __name__ == "__main__":

username = "你的账号"

password = "你的密码"

pincode = login_pre(username)

login(username, password, pincode)

神龙|纯净稳定代理IP免费测试>>>>>>>>天启|企业级代理IP免费测试>>>>>>>>IPIPGO|全球住宅代理IP免费测试

相关文章:

版权声明:Python教程2022-10-28发表,共计3834字。
新手QQ群:570568346,欢迎进群讨论 Python51学习