spidder / spidder.py
khulnasoft's picture
Create spidder.py
9a3880d verified
raw
history blame
518 Bytes
import requests
from bs4 import BeautifulSoup
# Specify the URL
url = "https://example.com"
# Send a request to the website
response = requests.get(url)
# Parse the HTML content
soup = BeautifulSoup(response.content, "html.parser")
# Extract the data you need
data = []
for item in soup.find_all("div", class_="example-class"):
data.append(item.text.strip())
# Save the data to a file or a list for further processing
with open("data.txt", "w") as file:
for entry in data:
file.write(entry + "\n")