获取小红书某个用户列表
from playwright.sync_api import sync_playwright
import time
cookie=[] #用自己的cookie
def capture_response(response, url_to_capture, api_response_data):
# 检查响应的 URL 是否匹配
if url_to_capture in response.url:
# 检查是否是 JSON 格式
if 'application/json' in response.headers.get('content-type', ''):
api_response_data.append(response.json()) # 将 JSON 数据添加到列表中
p = sync_playwright().start()
browser = p.chromium.launch(headless=True)
context = browser.new_context()
context.add_cookies(cookie)
page = context.new_page()
api_response_data = []
url_to_capture = 'https://edith.xiaohongshu.com/api/sns/web/v1/search/usersearch'
page.on('response', lambda response: capture_response(response, url_to_capture, api_response_data))
page.goto("https://www.xiaohongshu.com/search_result/?keyword=%25E5%25B0%258F%25E4%25B9%258C%25E8%258B%258F&source=web_explore_feed&type=51")
page.wait_for_load_state('load')
page.locator('//div[text()=" 用户"]').click()
page.wait_for_event('response')
#page.wait_for_timeout(3000)
while True:
page.mouse.wheel(0, 1000)
#page.wait_for_timeout(1000)
try:
page.wait_for_selector('//div[text()=" - THE END - "]',timeout=50)
print("下拉结束")
break
except Exception as e:
print("继续下拉")
pass
page.wait_for_load_state('load')
#print(page.content())
print(len(api_response_data))
#browser.close()
for k in api_response_data:
for i in k['data']['users']:
print(i['name'])
print(i['sub_title'])
print(i['image'])
print("====================================")