python 自动化在web领域应用
Python 在 Web 领域的自动化应用
Python 在 Web 自动化方面有着广泛的应用,以下是主要的应用场景和工具:
1. 网页测试自动化
Selenium - 功能测试
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ECdef test_login():driver = webdriver.Chrome()try:driver.get("https://example.com/login")# 输入用户名密码username = driver.find_element(By.ID, "username")password = driver.find_element(By.ID, "password")username.send_keys("testuser")password.send_keys("password123")# 点击登录login_btn = driver.find_element(By.XPATH, "//button[@type='submit']")login_btn.click()# 等待页面跳转并验证WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "dashboard")))print("登录测试成功!")finally:driver.quit()# 运行测试
test_login()
Playwright - 现代浏览器自动化
from playwright.sync_api import sync_playwrightdef test_with_playwright():with sync_playwright() as p:browser = p.chromium.launch()page = browser.new_page()# 导航到页面page.goto("https://example.com")# 截图page.screenshot(path="screenshot.png")# 获取页面内容title = page.title()print(f"页面标题: {title}")browser.close()
2. 网页数据抓取
BeautifulSoup + Requests
import requests
from bs4 import BeautifulSoup
import pandas as pddef scrape_website(url):headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}response = requests.get(url, headers=headers)soup = BeautifulSoup(response.text, 'html.parser')# 提取数据data = []articles = soup.find_all('article', class_='news-item')for article in articles:title = article.find('h2').text.strip()link = article.find('a')['href']date = article.find('time')['datetime']data.append({'title': title,'link': link,'date': date})# 保存到CSVdf = pd.DataFrame(data)df.to_csv('scraped_data.csv', index=False)return df# 使用示例
scrape_website('https://news.example.com')
Scrapy - 专业爬虫框架
# 安装: pip install scrapy
# 创建项目: scrapy startproject myproject# 示例爬虫
import scrapyclass NewsSpider(scrapy.Spider):name = 'news'start_urls = ['https://news.example.com']def parse(self, response):for article in response.css('article.news-item'):yield {'title': article.css('h2::text').get(),'link': article.css('a::attr(href)').get(),'date': article.css('time::attr(datetime)').get()}# 翻页next_page = response.css('a.next-page::attr(href)').get()if next_page:yield response.follow(next_page, self.parse)
3. API 测试自动化
Requests + Pytest
import requests
import pytestclass TestAPI:BASE_URL = "https://api.example.com"def test_get_users(self):response = requests.get(f"{self.BASE_URL}/users")assert response.status_code == 200assert isinstance(response.json(), list)def test_create_user(self):data = {"name": "John", "email": "john@example.com"}response = requests.post(f"{self.BASE_URL}/users", json=data)assert response.status_code == 201assert response.json()["name"] == "John"# 运行: pytest test_api.py -v
4. 性能测试自动化
Locust - 负载测试
from locust import HttpUser, task, betweenclass WebsiteUser(HttpUser):wait_time = between(1, 5)@taskdef view_homepage(self):self.client.get("/")@task(3) # 3倍权重def view_products(self):self.client.get("/products")@taskdef login(self):self.client.post("/login", {"username": "testuser","password": "testpass"})# 运行: locust -f locustfile.py
5. 部署自动化
Fabric - 远程部署
from fabric import Connection, task@task
def deploy(c):"""自动化部署到服务器"""with Connection('user@server.com') as conn:# 拉取最新代码conn.run('cd /var/www/app && git pull')# 安装依赖conn.run('cd /var/www/app && pip install -r requirements.txt')# 迁移数据库conn.run('cd /var/www/app && python manage.py migrate')# 重启服务conn.run('sudo systemctl restart myapp')print("部署完成!")
6. 监控自动化
网站可用性监控
import requests
import time
import smtplib
from email.mime.text import MIMETextdef monitor_website(url, check_interval=300):"""监控网站可用性"""while True:try:response = requests.get(url, timeout=10)if response.status_code != 200:send_alert(f"网站 {url} 返回状态码: {response.status_code}")except requests.RequestException as e:send_alert(f"网站 {url} 无法访问: {str(e)}")time.sleep(check_interval)def send_alert(message):"""发送警报邮件"""msg = MIMEText(message)msg['Subject'] = '网站监控警报'msg['From'] = 'monitor@example.com'msg['To'] = 'admin@example.com'with smtplib.SMTP('smtp.example.com', 587) as server:server.starttls()server.login('user', 'password')server.send_message(msg)
7. 表单自动填写
自动化表单提交
from selenium import webdriver
from selenium.webdriver.common.by import Bydef auto_fill_form(url, form_data):driver = webdriver.Chrome()try:driver.get(url)# 填写表单字段for field_name, value in form_data.items():element = driver.find_element(By.NAME, field_name)element.clear()element.send_keys(value)# 提交表单submit_btn = driver.find_element(By.XPATH, "//button[@type='submit']")submit_btn.click()print("表单提交成功!")finally:driver.quit()# 使用示例
form_data = {'username': 'testuser','email': 'test@example.com','message': '这是一条测试消息'
}
auto_fill_form('https://example.com/contact', form_data)
最佳实践
- 使用 headless 模式:节省资源,提高速度
- 添加延迟:避免被识别为机器人
- 错误处理:完善的异常处理机制
- 日志记录:记录自动化过程
- 遵守 robots.txt:尊重网站规则
- 使用代理:防止IP被封
这些工具和技术可以帮助你在 Web 领域实现各种自动化任务,从测试到部署,从数据抓取到监控。