第一个简单的爬虫
Python爬虫的流程:获取网页,解析网页,存储数据
from bs4 import BeautifulSoup
import requests
#获取百度网页
link = "https://www.baidu.com"
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.5845.97 Safari/537.36 SE 2.X MetaSr 1.0'}
response = requests.get(link,headers=headers)
#解析网页,提取需要的数据
soup = BeautifulSoup(response.text,"html.parser")
title = soup.find("title")
print(title.text)
#存储数据到TXT文件
with open('test.txt',"a+") as f:
f.write(title.text)