old kali网站下载链接爬取-Kali linux 全部版本镜像下载--Index of /kali-images
Kali linux 全部版本镜像下载
目的
出于该网站不稳定原因,故爬取下载链接,以便网友下载老版本kali
from bs4 import BeautifulSoup
import requests
from urllib.parse import urljoinbase_url = "http://old.kali.org/kali-images/"
visited = set()def crawl(url, f):if url in visited:returnvisited.add(url)headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"}try:res = requests.get(url, headers=headers, timeout=10)res.raise_for_status()except Exception as e:print(f"Failed to access {url}: {e}")returnsoup = BeautifulSoup(res.text, "html.parser")for link in soup.find_all("a"):href = link.get("href")if not href or href.startswith("?") or href.startswith("#"):continuefull_url = urljoin(url, href)if href.endswith("/"):crawl(full_url, f) # 递归进入子目录elif href.endswith((".iso", ".torrent", ".txt", ".zsync", ".sha256sum")):print(f"Found: {full_url}")f.write(full_url + "\n")f.flush() # 立即写入硬盘(防止崩溃丢失)if __name__ == "__main__":with open("kali_download_links.txt", "w") as f:crawl(base_url, f)print("\n已完成下载链接写入:kali_download_links.txt")
爬取页面
提取 下载链接 百度网盘